diff options
Diffstat (limited to 'fs')
112 files changed, 1655 insertions, 1595 deletions
diff --git a/fs/9p/error.c b/fs/9p/error.c index ae91555c155..0d7fa4e0881 100644 --- a/fs/9p/error.c +++ b/fs/9p/error.c @@ -83,6 +83,7 @@ int v9fs_errstr2errno(char *errstr, int len) if (errno == 0) { /* TODO: if error isn't found, add it dynamically */ + errstr[len] = 0; printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__, errstr); errno = 1; diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 27507201f9e..a9b6301a04f 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -25,6 +25,7 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/idr.h> +#include <asm/semaphore.h> #include "debug.h" #include "v9fs.h" @@ -84,6 +85,7 @@ struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid) new->iounit = 0; new->rdir_pos = 0; new->rdir_fcall = NULL; + init_MUTEX(&new->lock); INIT_LIST_HEAD(&new->list); return new; @@ -102,11 +104,11 @@ void v9fs_fid_destroy(struct v9fs_fid *fid) } /** - * v9fs_fid_lookup - retrieve the right fid from a particular dentry + * v9fs_fid_lookup - return a locked fid from a dentry * @dentry: dentry to look for fid in - * @type: intent of lookup (operation or traversal) * - * find a fid in the dentry + * find a fid in the dentry, obtain its semaphore and return a reference to it. + * code calling lookup is responsible for releasing lock * * TODO: only match fids that have the same uid as current user * @@ -124,7 +126,68 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) if (!return_fid) { dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n"); + return_fid = ERR_PTR(-EBADF); } + if(down_interruptible(&return_fid->lock)) + return ERR_PTR(-EINTR); + return return_fid; } + +/** + * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and release it + * @dentry: dentry to look for fid in + * + * find a fid in the dentry and then clone to a new private fid + * + * TODO: only match fids that have the same uid as current user + * + */ + +struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry) +{ + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); + struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF); + struct v9fs_fcall *fcall = NULL; + int fid, err; + + base_fid = v9fs_fid_lookup(dentry); + + if(IS_ERR(base_fid)) + return base_fid; + + if(base_fid) { /* clone fid */ + fid = v9fs_get_idpool(&v9ses->fidpool); + if (fid < 0) { + eprintk(KERN_WARNING, "newfid fails!\n"); + new_fid = ERR_PTR(-ENOSPC); + goto Release_Fid; + } + + err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall); + if (err < 0) { + dprintk(DEBUG_ERROR, "clone walk didn't work\n"); + v9fs_put_idpool(fid, &v9ses->fidpool); + new_fid = ERR_PTR(err); + goto Free_Fcall; + } + new_fid = v9fs_fid_create(v9ses, fid); + if (new_fid == NULL) { + dprintk(DEBUG_ERROR, "out of memory\n"); + new_fid = ERR_PTR(-ENOMEM); + } +Free_Fcall: + kfree(fcall); + } + +Release_Fid: + up(&base_fid->lock); + return new_fid; +} + +void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid) +{ + v9fs_t_clunk(v9ses, fid->fid); + v9fs_fid_destroy(fid); +} diff --git a/fs/9p/fid.h b/fs/9p/fid.h index aa974d6875c..48fc170c26c 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -30,6 +30,8 @@ struct v9fs_fid { struct list_head list; /* list of fids associated with a dentry */ struct list_head active; /* XXX - debug */ + struct semaphore lock; + u32 fid; unsigned char fidopen; /* set when fid is opened */ unsigned char fidclunked; /* set when fid has already been clunked */ @@ -55,3 +57,6 @@ struct v9fs_fid *v9fs_fid_get_created(struct dentry *); void v9fs_fid_destroy(struct v9fs_fid *fid); struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid); int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry); +struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry); +void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid); + diff --git a/fs/9p/mux.c b/fs/9p/mux.c index 944273c3dbf..147ceef8e53 100644 --- a/fs/9p/mux.c +++ b/fs/9p/mux.c @@ -132,8 +132,10 @@ int v9fs_mux_global_init(void) v9fs_mux_poll_tasks[i].task = NULL; v9fs_mux_wq = create_workqueue("v9fs"); - if (!v9fs_mux_wq) + if (!v9fs_mux_wq) { + printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); return -ENOMEM; + } return 0; } diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 0b96fae8b47..d9b561ba5e5 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -457,14 +457,19 @@ static int __init init_v9fs(void) v9fs_error_init(); - printk(KERN_INFO "Installing v9fs 9P2000 file system support\n"); + printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); ret = v9fs_mux_global_init(); - if (!ret) + if (ret) { + printk(KERN_WARNING "v9fs: starting mux failed\n"); return ret; + } ret = register_filesystem(&v9fs_fs_type); - if (!ret) + if (ret) { + printk(KERN_WARNING "v9fs: registering file system failed\n"); v9fs_mux_global_exit(); + } + return ret; } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index e86a0715128..9f17b0cacdd 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -55,53 +55,22 @@ int v9fs_file_open(struct inode *inode, struct file *file) struct v9fs_fid *vfid; struct v9fs_fcall *fcall = NULL; int omode; - int fid = V9FS_NOFID; int err; dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file); - vfid = v9fs_fid_lookup(file->f_path.dentry); - if (!vfid) { - dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n"); - return -EBADF; - } - - fid = v9fs_get_idpool(&v9ses->fidpool); - if (fid < 0) { - eprintk(KERN_WARNING, "newfid fails!\n"); - return -ENOSPC; - } + vfid = v9fs_fid_clone(file->f_path.dentry); + if (IS_ERR(vfid)) + return PTR_ERR(vfid); - err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, &fcall); - if (err < 0) { - dprintk(DEBUG_ERROR, "rewalk didn't work\n"); - if (fcall && fcall->id == RWALK) - goto clunk_fid; - else { - v9fs_put_idpool(fid, &v9ses->fidpool); - goto free_fcall; - } - } - kfree(fcall); - - /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */ - /* translate open mode appropriately */ omode = v9fs_uflags2omode(file->f_flags); - err = v9fs_t_open(v9ses, fid, omode, &fcall); + err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall); if (err < 0) { PRINT_FCALL_ERROR("open failed", fcall); - goto clunk_fid; - } - - vfid = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); - if (vfid == NULL) { - dprintk(DEBUG_ERROR, "out of memory\n"); - err = -ENOMEM; - goto clunk_fid; + goto Clunk_Fid; } file->private_data = vfid; - vfid->fid = fid; vfid->fidopen = 1; vfid->fidclunked = 0; vfid->iounit = fcall->params.ropen.iounit; @@ -112,10 +81,8 @@ int v9fs_file_open(struct inode *inode, struct file *file) return 0; -clunk_fid: - v9fs_t_clunk(v9ses, fid); - -free_fcall: +Clunk_Fid: + v9fs_fid_clunk(v9ses, vfid); kfree(fcall); return err; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 18f26cdfd88..9109ba1d696 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -416,12 +416,8 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) sb = file_inode->i_sb; v9ses = v9fs_inode2v9ses(file_inode); v9fid = v9fs_fid_lookup(file); - - if (!v9fid) { - dprintk(DEBUG_ERROR, - "no v9fs_fid\n"); - return -EBADF; - } + if(IS_ERR(v9fid)) + return PTR_ERR(v9fid); fid = v9fid->fid; if (fid < 0) { @@ -433,11 +429,13 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) result = v9fs_t_remove(v9ses, fid, &fcall); if (result < 0) { PRINT_FCALL_ERROR("remove fails", fcall); + goto Error; } v9fs_put_idpool(fid, &v9ses->fidpool); v9fs_fid_destroy(v9fid); +Error: kfree(fcall); return result; } @@ -473,9 +471,13 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, inode = NULL; vfid = NULL; v9ses = v9fs_inode2v9ses(dir); - dfid = v9fs_fid_lookup(dentry->d_parent); - perm = unixmode2p9mode(v9ses, mode); + dfid = v9fs_fid_clone(dentry->d_parent); + if(IS_ERR(dfid)) { + err = PTR_ERR(dfid); + goto error; + } + perm = unixmode2p9mode(v9ses, mode); if (nd && nd->flags & LOOKUP_OPEN) flags = nd->intent.open.flags - 1; else @@ -485,9 +487,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit); if (err) - goto error; + goto clunk_dfid; vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); + v9fs_fid_clunk(v9ses, dfid); if (IS_ERR(vfid)) { err = PTR_ERR(vfid); vfid = NULL; @@ -525,6 +528,9 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, return 0; +clunk_dfid: + v9fs_fid_clunk(v9ses, dfid); + error: if (vfid) v9fs_fid_destroy(vfid); @@ -551,7 +557,12 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode = NULL; vfid = NULL; v9ses = v9fs_inode2v9ses(dir); - dfid = v9fs_fid_lookup(dentry->d_parent); + dfid = v9fs_fid_clone(dentry->d_parent); + if(IS_ERR(dfid)) { + err = PTR_ERR(dfid); + goto error; + } + perm = unixmode2p9mode(v9ses, mode | S_IFDIR); err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, @@ -559,37 +570,36 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) { dprintk(DEBUG_ERROR, "create error %d\n", err); - goto error; - } - - err = v9fs_t_clunk(v9ses, fid); - if (err) { - dprintk(DEBUG_ERROR, "clunk error %d\n", err); - goto error; + goto clean_up_dfid; } vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); if (IS_ERR(vfid)) { err = PTR_ERR(vfid); vfid = NULL; - goto error; + goto clean_up_dfid; } + v9fs_fid_clunk(v9ses, dfid); inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); inode = NULL; - goto error; + goto clean_up_fids; } dentry->d_op = &v9fs_dentry_operations; d_instantiate(dentry, inode); return 0; -error: +clean_up_fids: if (vfid) v9fs_fid_destroy(vfid); +clean_up_dfid: + v9fs_fid_clunk(v9ses, dfid); + +error: return err; } @@ -622,28 +632,23 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_op = &v9fs_dentry_operations; dirfid = v9fs_fid_lookup(dentry->d_parent); - if (!dirfid) { - dprintk(DEBUG_ERROR, "no dirfid\n"); - return ERR_PTR(-EINVAL); - } + if(IS_ERR(dirfid)) + return ERR_PTR(PTR_ERR(dirfid)); dirfidnum = dirfid->fid; - if (dirfidnum < 0) { - dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n", - dir, dir->i_ino); - return ERR_PTR(-EBADF); - } - newfid = v9fs_get_idpool(&v9ses->fidpool); if (newfid < 0) { eprintk(KERN_WARNING, "newfid fails!\n"); - return ERR_PTR(-ENOSPC); + result = -ENOSPC; + goto Release_Dirfid; } result = v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name, &fcall); + up(&dirfid->lock); + if (result < 0) { if (fcall && fcall->id == RWALK) v9fs_t_clunk(v9ses, newfid); @@ -701,8 +706,12 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, return NULL; - FreeFcall: +Release_Dirfid: + up(&dirfid->lock); + +FreeFcall: kfree(fcall); + return ERR_PTR(result); } @@ -746,10 +755,8 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = old_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); - struct v9fs_fid *olddirfid = - v9fs_fid_lookup(old_dentry->d_parent); - struct v9fs_fid *newdirfid = - v9fs_fid_lookup(new_dentry->d_parent); + struct v9fs_fid *olddirfid; + struct v9fs_fid *newdirfid; struct v9fs_wstat wstat; struct v9fs_fcall *fcall = NULL; int fid = -1; @@ -759,16 +766,26 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, dprintk(DEBUG_VFS, "\n"); - if ((!oldfid) || (!olddirfid) || (!newdirfid)) { - dprintk(DEBUG_ERROR, "problem with arguments\n"); - return -EBADF; + if(IS_ERR(oldfid)) + return PTR_ERR(oldfid); + + olddirfid = v9fs_fid_clone(old_dentry->d_parent); + if(IS_ERR(olddirfid)) { + retval = PTR_ERR(olddirfid); + goto Release_lock; + } + + newdirfid = v9fs_fid_clone(new_dentry->d_parent); + if(IS_ERR(newdirfid)) { + retval = PTR_ERR(newdirfid); + goto Clunk_olddir; } /* 9P can only handle file rename in the same directory */ if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { dprintk(DEBUG_ERROR, "old dir and new dir are different\n"); - retval = -EPERM; - goto FreeFcallnBail; + retval = -EXDEV; + goto Clunk_newdir; } fid = oldfid->fid; @@ -779,7 +796,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, dprintk(DEBUG_ERROR, "no fid for old file #%lu\n", old_inode->i_ino); retval = -EBADF; - goto FreeFcallnBail; + goto Clunk_newdir; } v9fs_blank_wstat(&wstat); @@ -788,11 +805,20 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall); - FreeFcallnBail: if (retval < 0) PRINT_FCALL_ERROR("wstat error", fcall); kfree(fcall); + +Clunk_newdir: + v9fs_fid_clunk(v9ses, newdirfid); + +Clunk_olddir: + v9fs_fid_clunk(v9ses, olddirfid); + +Release_lock: + up(&oldfid->lock); + return retval; } @@ -810,15 +836,12 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, { struct v9fs_fcall *fcall = NULL; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_lookup(dentry); + struct v9fs_fid *fid = v9fs_fid_clone(dentry); int err = -EPERM; dprintk(DEBUG_VFS, "dentry: %p\n", dentry); - if (!fid) { - dprintk(DEBUG_ERROR, - "couldn't find fid associated with dentry\n"); - return -EBADF; - } + if(IS_ERR(fid)) + return PTR_ERR(fid); err = v9fs_t_stat(v9ses, fid->fid, &fcall); @@ -831,6 +854,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, } kfree(fcall); + v9fs_fid_clunk(v9ses, fid); return err; } @@ -844,18 +868,14 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_lookup(dentry); + struct v9fs_fid *fid = v9fs_fid_clone(dentry); struct v9fs_fcall *fcall = NULL; struct v9fs_wstat wstat; int res = -EPERM; dprintk(DEBUG_VFS, "\n"); - - if (!fid) { - dprintk(DEBUG_ERROR, - "Couldn't find fid associated with dentry\n"); - return -EBADF; - } + if(IS_ERR(fid)) + return PTR_ERR(fid); v9fs_blank_wstat(&wstat); if (iattr->ia_valid & ATTR_MODE) @@ -887,6 +907,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) if (res >= 0) res = inode_setattr(dentry->d_inode, iattr); + v9fs_fid_clunk(v9ses, fid); return res; } @@ -987,18 +1008,15 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) struct v9fs_fcall *fcall = NULL; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_lookup(dentry); + struct v9fs_fid *fid = v9fs_fid_clone(dentry); - if (!fid) { - dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n"); - retval = -EBADF; - goto FreeFcall; - } + if(IS_ERR(fid)) + return PTR_ERR(fid); if (!v9ses->extended) { retval = -EBADF; dprintk(DEBUG_ERROR, "not extended\n"); - goto FreeFcall; + goto ClunkFid; } dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name); @@ -1009,8 +1027,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) goto FreeFcall; } - if (!fcall) - return -EIO; + if (!fcall) { + retval = -EIO; + goto ClunkFid; + } if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) { retval = -EINVAL; @@ -1028,9 +1048,12 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) fcall->params.rstat.stat.extension.str, buffer); retval = buflen; - FreeFcall: +FreeFcall: kfree(fcall); +ClunkFid: + v9fs_fid_clunk(v9ses, fid); + return retval; } @@ -1123,52 +1146,58 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, int err; u32 fid, perm; struct v9fs_session_info *v9ses; - struct v9fs_fid *dfid, *vfid; - struct inode *inode; + struct v9fs_fid *dfid, *vfid = NULL; + struct inode *inode = NULL; - inode = NULL; - vfid = NULL; v9ses = v9fs_inode2v9ses(dir); - dfid = v9fs_fid_lookup(dentry->d_parent); - perm = unixmode2p9mode(v9ses, mode); - if (!v9ses->extended) { dprintk(DEBUG_ERROR, "not extended\n"); return -EPERM; } + dfid = v9fs_fid_clone(dentry->d_parent); + if(IS_ERR(dfid)) { + err = PTR_ERR(dfid); + goto error; + } + + perm = unixmode2p9mode(v9ses, mode); + err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL); if (err) - goto error; + goto clunk_dfid; err = v9fs_t_clunk(v9ses, fid); if (err) - goto error; + goto clunk_dfid; vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); if (IS_ERR(vfid)) { err = PTR_ERR(vfid); vfid = NULL; - goto error; + goto clunk_dfid; } inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); inode = NULL; - goto error; + goto free_vfid; } dentry->d_op = &v9fs_dentry_operations; d_instantiate(dentry, inode); return 0; -error: - if (vfid) - v9fs_fid_destroy(vfid); +free_vfid: + v9fs_fid_destroy(vfid); + +clunk_dfid: + v9fs_fid_clunk(v9ses, dfid); +error: return err; } @@ -1209,26 +1238,29 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int retval; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); struct v9fs_fid *oldfid; char *name; dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); - oldfid = v9fs_fid_lookup(old_dentry); - if (!oldfid) { - dprintk(DEBUG_ERROR, "can't find oldfid\n"); - return -EPERM; - } + oldfid = v9fs_fid_clone(old_dentry); + if(IS_ERR(oldfid)) + return PTR_ERR(oldfid); name = __getname(); - if (unlikely(!name)) - return -ENOMEM; + if (unlikely(!name)) { + retval = -ENOMEM; + goto clunk_fid; + } sprintf(name, "%d\n", oldfid->fid); retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); __putname(name); +clunk_fid: + v9fs_fid_clunk(v9ses, oldfid); return retval; } @@ -298,17 +298,23 @@ static void wait_for_all_aios(struct kioctx *ctx) struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); + spin_lock_irq(&ctx->ctx_lock); if (!ctx->reqs_active) - return; + goto out; add_wait_queue(&ctx->wait, &wait); set_task_state(tsk, TASK_UNINTERRUPTIBLE); while (ctx->reqs_active) { + spin_unlock_irq(&ctx->ctx_lock); schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); + spin_lock_irq(&ctx->ctx_lock); } __set_task_state(tsk, TASK_RUNNING); remove_wait_queue(&ctx->wait, &wait); + +out: + spin_unlock_irq(&ctx->ctx_lock); } /* wait_on_sync_kiocb: @@ -424,7 +430,6 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) { list_add(&req->ki_list, &ctx->active_reqs); - get_ioctx(ctx); ctx->reqs_active++; okay = 1; } @@ -536,8 +541,6 @@ int fastcall aio_put_req(struct kiocb *req) spin_lock_irq(&ctx->ctx_lock); ret = __aio_put_req(ctx, req); spin_unlock_irq(&ctx->ctx_lock); - if (ret) - put_ioctx(ctx); return ret; } @@ -779,8 +782,7 @@ static int __aio_run_iocbs(struct kioctx *ctx) */ iocb->ki_users++; /* grab extra reference */ aio_run_iocb(iocb); - if (__aio_put_req(ctx, iocb)) /* drop extra ref */ - put_ioctx(ctx); + __aio_put_req(ctx, iocb); } if (!list_empty(&ctx->run_list)) return 1; @@ -997,14 +999,10 @@ put_rq: /* everything turned out well, dispose of the aiocb. */ ret = __aio_put_req(ctx, iocb); - spin_unlock_irqrestore(&ctx->ctx_lock, flags); - if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - if (ret) - put_ioctx(ctx); - + spin_unlock_irqrestore(&ctx->ctx_lock, flags); return ret; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7cb28720f90..669dbe5b031 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -682,6 +682,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) goto out_free_interp; + + /* + * If the binary is not readable then enforce + * mm->dumpable = 0 regardless of the interpreter's + * permissions. + */ + if (file_permission(interpreter, MAY_READ) < 0) + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); if (retval != BINPRM_BUF_SIZE) { @@ -1178,6 +1187,10 @@ static int dump_seek(struct file *file, loff_t off) */ static int maydump(struct vm_area_struct *vma) { + /* The vma can be set up to tell us the answer directly. */ + if (vma->vm_flags & VM_ALWAYSDUMP) + return 1; + /* Do not dump I/O mapped devices or special mappings */ if (vma->vm_flags & (VM_IO | VM_RESERVED)) return 0; @@ -1424,6 +1437,32 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) return sz; } +static struct vm_area_struct *first_vma(struct task_struct *tsk, + struct vm_area_struct *gate_vma) +{ + struct vm_area_struct *ret = tsk->mm->mmap; + + if (ret) + return ret; + return gate_vma; +} +/* + * Helper function for iterating across a vma list. It ensures that the caller + * will visit `gate_vma' prior to terminating the search. + */ +static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, + struct vm_area_struct *gate_vma) +{ + struct vm_area_struct *ret; + + ret = this_vma->vm_next; + if (ret) + return ret; + if (this_vma == gate_vma) + return NULL; + return gate_vma; +} + /* * Actual dumper * @@ -1439,7 +1478,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) int segs; size_t size = 0; int i; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff, foffset; unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; @@ -1525,6 +1564,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) segs += ELF_CORE_EXTRA_PHDRS; #endif + gate_vma = get_gate_vma(current); + if (gate_vma != NULL) + segs++; + /* Set up header */ fill_elf_header(elf, segs + 1); /* including notes section */ @@ -1592,7 +1635,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); /* Write program headers for segments dump */ - for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { + for (vma = first_vma(current, gate_vma); vma != NULL; + vma = next_vma(vma, gate_vma)) { struct elf_phdr phdr; size_t sz; @@ -1641,7 +1685,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) /* Align to page */ DUMP_SEEK(dataoff - foffset); - for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { + for (vma = first_vma(current, gate_vma); vma != NULL; + vma = next_vma(vma, gate_vma)) { unsigned long addr; if (!maydump(vma)) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 6e6d4568d54..a4d933a5120 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -234,6 +234,14 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, goto error; } + /* + * If the binary is not readable then enforce + * mm->dumpable = 0 regardless of the interpreter's + * permissions. + */ + if (file_permission(interpreter, MAY_READ) < 0) + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); if (retval < 0) diff --git a/fs/block_dev.c b/fs/block_dev.c index 1715d6b5f41..fc7028b685f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -129,6 +129,46 @@ blkdev_get_block(struct inode *inode, sector_t iblock, return 0; } +static int +blkdev_get_blocks(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + sector_t end_block = max_block(I_BDEV(inode)); + unsigned long max_blocks = bh->b_size >> inode->i_blkbits; + + if ((iblock + max_blocks) > end_block) { + max_blocks = end_block - iblock; + if ((long)max_blocks <= 0) { + if (create) + return -EIO; /* write fully beyond EOF */ + /* + * It is a read which is fully beyond EOF. We return + * a !buffer_mapped buffer + */ + max_blocks = 0; + } + } + + bh->b_bdev = I_BDEV(inode); + bh->b_blocknr = iblock; + bh->b_size = max_blocks << inode->i_blkbits; + if (max_blocks) + set_buffer_mapped(bh); + return 0; +} + +static ssize_t +blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), + iov, offset, nr_segs, blkdev_get_blocks, NULL); +} + +#if 0 static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error) { struct kiocb *iocb = bio->bi_private; @@ -146,7 +186,7 @@ static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error) iocb->ki_nbytes = -EIO; if (atomic_dec_and_test(bio_count)) { - if (iocb->ki_nbytes < 0) + if ((long)iocb->ki_nbytes < 0) aio_complete(iocb, iocb->ki_nbytes, 0); else aio_complete(iocb, iocb->ki_left, 0); @@ -190,6 +230,12 @@ static struct page *blk_get_page(unsigned long addr, size_t count, int rw, return pvec->page[pvec->idx++]; } +/* return a page back to pvec array */ +static void blk_unget_page(struct page *page, struct pvec *pvec) +{ + pvec->page[--pvec->idx] = page; +} + static ssize_t blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) @@ -278,6 +324,8 @@ same_bio: count = min(count, nbytes); goto same_bio; } + } else { + blk_unget_page(page, &pvec); } /* bio is ready, submit it */ @@ -315,6 +363,7 @@ backout: return PTR_ERR(page); goto completion; } +#endif static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { @@ -411,7 +460,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); - mutex_init(&bdev->bd_mount_mutex); + sema_init(&bdev->bd_mount_sem, 1); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS diff --git a/fs/buffer.c b/fs/buffer.c index 263f88e4dff..1ad674fd348 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -180,7 +180,7 @@ int fsync_bdev(struct block_device *bdev) * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * - * This takes the block device bd_mount_mutex to make sure no new mounts + * This takes the block device bd_mount_sem to make sure no new mounts * happen on bdev until thaw_bdev() is called. * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. @@ -189,7 +189,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) { struct super_block *sb; - mutex_lock(&bdev->bd_mount_mutex); + down(&bdev->bd_mount_sem); sb = get_super(bdev); if (sb && !(sb->s_flags & MS_RDONLY)) { sb->s_frozen = SB_FREEZE_WRITE; @@ -231,7 +231,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) drop_super(sb); } - mutex_unlock(&bdev->bd_mount_mutex); + up(&bdev->bd_mount_sem); } EXPORT_SYMBOL(thaw_bdev); @@ -2834,7 +2834,7 @@ int try_to_free_buffers(struct page *page) int ret = 0; BUG_ON(!PageLocked(page)); - if (PageDirty(page) || PageWriteback(page)) + if (PageWriteback(page)) return 0; if (mapping == NULL) { /* can this still happen? */ @@ -2844,6 +2844,23 @@ int try_to_free_buffers(struct page *page) spin_lock(&mapping->private_lock); ret = drop_buffers(page, &buffers_to_free); + + /* + * If the filesystem writes its buffers by hand (eg ext3) + * then we can have clean buffers against a dirty page. We + * clean the page here; otherwise the VM will never notice + * that the filesystem did any IO at all. + * + * Also, during truncate, discard_buffer will have marked all + * the page's buffers clean. We discover that here and clean + * the page also. + * + * private_lock must be held over this entire operation in order + * to synchronise against __set_page_dirty_buffers and prevent the + * dirty bit from being lost. + */ + if (ret) + cancel_dirty_page(page, PAGE_CACHE_SIZE); spin_unlock(&mapping->private_lock); out: if (buffers_to_free) { diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 3539d6ef961..85e3850bf2c 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,9 @@ +Version 1.47 +------------ +Fix oops in list_del during mount caused by unaligned string. +Seek to SEEK_END forces check for update of file size for non-cached +files. + Version 1.46 ------------ Support deep tree mounts. Better support OS/2, Win9x (DOS) time stamps. diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 96abeb73897..6017c465440 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -143,8 +143,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if((ses->serverDomain == NULL) || (ses->serverOS == NULL) || (ses->serverNOS == NULL)) { - buf += sprintf("\nentry for %s not fully displayed\n\t", - ses->serverName); + buf += sprintf(buf, "\nentry for %s not fully " + "displayed\n\t", ses->serverName); } else { length = diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 10c90294cd1..93ef09971d2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -511,7 +511,15 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) { /* origin == SEEK_END => we must revalidate the cached file length */ if (origin == SEEK_END) { - int retval = cifs_revalidate(file->f_path.dentry); + int retval; + + /* some applications poll for the file length in this strange + way so we must seek to end on non-oplocked files by + setting the revalidate time to zero */ + if(file->f_path.dentry->d_inode) + CIFS_I(file->f_path.dentry->d_inode)->time = 0; + + retval = cifs_revalidate(file->f_path.dentry); if (retval < 0) return (loff_t)retval; } diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index a243f779b36..8aa66dcf13b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.46" +#define CIFS_VERSION "1.47" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8a49b2e77d3..e9dcf5ee29a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1146,7 +1146,7 @@ static int cifs_writepages(struct address_space *mapping, pgoff_t end; pgoff_t index; int range_whole = 0; - struct kvec iov[32]; + struct kvec * iov; int len; int n_iov = 0; pgoff_t next; @@ -1171,15 +1171,21 @@ static int cifs_writepages(struct address_space *mapping, if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) if(cifs_sb->tcon->ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - if(!experimEnabled) + if(!experimEnabled) return generic_writepages(mapping, wbc); + iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL); + if(iov == NULL) + return generic_writepages(mapping, wbc); + + /* * BB: Is this meaningful for a non-block-device file system? * If it is, we should test it again after we do I/O */ if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; + kfree(iov); return 0; } @@ -1345,7 +1351,7 @@ retry: mapping->writeback_index = index; FreeXid(xid); - + kfree(iov); return rc; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index aedf683f011..19cc294c7c7 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -71,9 +71,7 @@ sesInfoAlloc(void) { struct cifsSesInfo *ret_buf; - ret_buf = - (struct cifsSesInfo *) kzalloc(sizeof (struct cifsSesInfo), - GFP_KERNEL); + ret_buf = kzalloc(sizeof (struct cifsSesInfo), GFP_KERNEL); if (ret_buf) { write_lock(&GlobalSMBSeslock); atomic_inc(&sesInfoAllocCount); @@ -109,9 +107,7 @@ struct cifsTconInfo * tconInfoAlloc(void) { struct cifsTconInfo *ret_buf; - ret_buf = - (struct cifsTconInfo *) kzalloc(sizeof (struct cifsTconInfo), - GFP_KERNEL); + ret_buf = kzalloc(sizeof (struct cifsTconInfo), GFP_KERNEL); if (ret_buf) { write_lock(&GlobalSMBSeslock); atomic_inc(&tconInfoAllocCount); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 99dfb5337e3..782940be550 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -156,9 +156,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, tmp_inode->i_atime = cnvrtDosUnixTm( le16_to_cpu(pfindData->LastAccessDate), le16_to_cpu(pfindData->LastAccessTime)); - tmp_inode->i_ctime = cnvrtDosUnixTm( - le16_to_cpu(pfindData->LastWriteDate), - le16_to_cpu(pfindData->LastWriteTime)); + tmp_inode->i_ctime = cnvrtDosUnixTm( + le16_to_cpu(pfindData->LastWriteDate), + le16_to_cpu(pfindData->LastWriteTime)); AdjustForTZ(cifs_sb->tcon, tmp_inode); attr = le16_to_cpu(pfindData->Attributes); allocation_size = le32_to_cpu(pfindData->AllocationSize); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index bbdda99dce6..75846463089 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -182,11 +182,14 @@ static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInf cFYI(1,("bleft %d",bleft)); - /* word align, if bytes remaining is not even */ - if(bleft % 2) { - bleft--; - data++; - } + /* SMB header is unaligned, so cifs servers word align start of + Unicode strings */ + data++; + bleft--; /* Windows servers do not always double null terminate + their final Unicode string - in which case we + now will not attempt to decode the byte of junk + which follows it */ + words_left = bleft / 2; /* save off server operating system */ diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c index 7a1b2b961ec..1b1daf63f06 100644 --- a/fs/cifs/smbdes.c +++ b/fs/cifs/smbdes.c @@ -196,7 +196,7 @@ dohash(char *out, char *in, char *key, int forw) char c[28]; char d[28]; char *cd; - char ki[16][48]; + char (*ki)[48]; char *pd1; char l[32], r[32]; char *rl; @@ -206,6 +206,12 @@ dohash(char *out, char *in, char *key, int forw) if(pk1 == NULL) return; + ki = kmalloc(16*48, GFP_KERNEL); + if(ki == NULL) { + kfree(pk1); + return; + } + cd = pk1 + 56; pd1= cd + 56; rl = pd1 + 64; @@ -243,6 +249,7 @@ dohash(char *out, char *in, char *key, int forw) er = kmalloc(48+48+32+32+32, GFP_KERNEL); if(er == NULL) { kfree(pk1); + kfree(ki); return; } erk = er+48; @@ -290,6 +297,7 @@ dohash(char *out, char *in, char *key, int forw) permute(out, rl, perm6, 64); kfree(pk1); + kfree(ki); } static void diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index b5654a284fe..6fa7b0d5c04 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -3,21 +3,21 @@ menu "Distributed Lock Manager" config DLM tristate "Distributed Lock Manager (DLM)" - depends on IPV6 || IPV6=n + depends on SYSFS && (IPV6 || IPV6=n) select CONFIGFS_FS select IP_SCTP if DLM_SCTP help - A general purpose distributed lock manager for kernel or userspace - applications. + A general purpose distributed lock manager for kernel or userspace + applications. choice prompt "Select DLM communications protocol" depends on DLM default DLM_TCP help - The DLM Can use TCP or SCTP for it's network communications. - SCTP supports multi-homed operations whereas TCP doesn't. - However, SCTP seems to have stability problems at the moment. + The DLM Can use TCP or SCTP for it's network communications. + SCTP supports multi-homed operations whereas TCP doesn't. + However, SCTP seems to have stability problems at the moment. config DLM_TCP bool "TCP/IP" @@ -31,8 +31,8 @@ config DLM_DEBUG bool "DLM debugging" depends on DLM help - Under the debugfs mount point, the name of each lockspace will - appear as a file in the "dlm" directory. The output is the - list of resource and locks the local node knows about. + Under the debugfs mount point, the name of each lockspace will + appear as a file in the "dlm" directory. The output is the + list of resource and locks the local node knows about. endmenu diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 88553054bbf..8665c88e5af 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *); static void drop_node(struct config_group *, struct config_item *); static void release_node(struct config_item *); +static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, + char *buf); +static ssize_t store_cluster(struct config_item *i, + struct configfs_attribute *a, + const char *buf, size_t len); static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, char *buf); static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, @@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); static ssize_t node_weight_read(struct node *nd, char *buf); static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); +struct cluster { + struct config_group group; + unsigned int cl_tcp_port; + unsigned int cl_buffer_size; + unsigned int cl_rsbtbl_size; + unsigned int cl_lkbtbl_size; + unsigned int cl_dirtbl_size; + unsigned int cl_recover_timer; + unsigned int cl_toss_secs; + unsigned int cl_scan_secs; + unsigned int cl_log_debug; +}; + +enum { + CLUSTER_ATTR_TCP_PORT = 0, + CLUSTER_ATTR_BUFFER_SIZE, + CLUSTER_ATTR_RSBTBL_SIZE, + CLUSTER_ATTR_LKBTBL_SIZE, + CLUSTER_ATTR_DIRTBL_SIZE, + CLUSTER_ATTR_RECOVER_TIMER, + CLUSTER_ATTR_TOSS_SECS, + CLUSTER_ATTR_SCAN_SECS, + CLUSTER_ATTR_LOG_DEBUG, +}; + +struct cluster_attribute { + struct configfs_attribute attr; + ssize_t (*show)(struct cluster *, char *); + ssize_t (*store)(struct cluster *, const char *, size_t); +}; + +static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, + unsigned int *info_field, int check_zero, + const char *buf, size_t len) +{ + unsigned int x; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + x = simple_strtoul(buf, NULL, 0); + + if (check_zero && !x) + return -EINVAL; + + *cl_field = x; + *info_field = x; + + return len; +} + +#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \ + .attr = { .ca_name = __stringify(_name), \ + .ca_mode = _mode, \ + .ca_owner = THIS_MODULE }, \ + .show = _read, \ + .store = _write, \ +} + +#define CLUSTER_ATTR(name, check_zero) \ +static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ +{ \ + return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ + check_zero, buf, len); \ +} \ +static ssize_t name##_read(struct cluster *cl, char *buf) \ +{ \ + return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ +} \ +static struct cluster_attribute cluster_attr_##name = \ +__CONFIGFS_ATTR(name, 0644, name##_read, name##_write) + +CLUSTER_ATTR(tcp_port, 1); +CLUSTER_ATTR(buffer_size, 1); +CLUSTER_ATTR(rsbtbl_size, 1); +CLUSTER_ATTR(lkbtbl_size, 1); +CLUSTER_ATTR(dirtbl_size, 1); +CLUSTER_ATTR(recover_timer, 1); +CLUSTER_ATTR(toss_secs, 1); +CLUSTER_ATTR(scan_secs, 1); +CLUSTER_ATTR(log_debug, 0); + +static struct configfs_attribute *cluster_attrs[] = { + [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, + [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, + [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, + [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr, + [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, + [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, + [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, + [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, + [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, + NULL, +}; + enum { COMM_ATTR_NODEID = 0, COMM_ATTR_LOCAL, @@ -152,10 +252,6 @@ struct clusters { struct configfs_subsystem subsys; }; -struct cluster { - struct config_group group; -}; - struct spaces { struct config_group ss_group; }; @@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = { static struct configfs_item_operations cluster_ops = { .release = release_cluster, + .show_attribute = show_cluster, + .store_attribute = store_cluster, }; static struct configfs_group_operations spaces_ops = { @@ -237,6 +335,7 @@ static struct config_item_type clusters_type = { static struct config_item_type cluster_type = { .ct_item_ops = &cluster_ops, + .ct_attrs = cluster_attrs, .ct_owner = THIS_MODULE, }; @@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g, cl->group.default_groups[1] = &cms->cs_group; cl->group.default_groups[2] = NULL; + cl->cl_tcp_port = dlm_config.ci_tcp_port; + cl->cl_buffer_size = dlm_config.ci_buffer_size; + cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; + cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size; + cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; + cl->cl_recover_timer = dlm_config.ci_recover_timer; + cl->cl_toss_secs = dlm_config.ci_toss_secs; + cl->cl_scan_secs = dlm_config.ci_scan_secs; + cl->cl_log_debug = dlm_config.ci_log_debug; + space_list = &sps->ss_group; comm_list = &cms->cs_group; return &cl->group; @@ -509,6 +618,25 @@ void dlm_config_exit(void) * Functions for user space to read/write attributes */ +static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, + char *buf) +{ + struct cluster *cl = to_cluster(i); + struct cluster_attribute *cla = + container_of(a, struct cluster_attribute, attr); + return cla->show ? cla->show(cl, buf) : 0; +} + +static ssize_t store_cluster(struct config_item *i, + struct configfs_attribute *a, + const char *buf, size_t len) +{ + struct cluster *cl = to_cluster(i); + struct cluster_attribute *cla = + container_of(a, struct cluster_attribute, attr); + return cla->store ? cla->store(cl, buf, len) : -EINVAL; +} + static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, char *buf) { @@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_RECOVER_TIMER 5 #define DEFAULT_TOSS_SECS 10 #define DEFAULT_SCAN_SECS 5 +#define DEFAULT_LOG_DEBUG 0 struct dlm_config_info dlm_config = { - .tcp_port = DEFAULT_TCP_PORT, - .buffer_size = DEFAULT_BUFFER_SIZE, - .rsbtbl_size = DEFAULT_RSBTBL_SIZE, - .lkbtbl_size = DEFAULT_LKBTBL_SIZE, - .dirtbl_size = DEFAULT_DIRTBL_SIZE, - .recover_timer = DEFAULT_RECOVER_TIMER, - .toss_secs = DEFAULT_TOSS_SECS, - .scan_secs = DEFAULT_SCAN_SECS + .ci_tcp_port = DEFAULT_TCP_PORT, + .ci_buffer_size = DEFAULT_BUFFER_SIZE, + .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, + .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE, + .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, + .ci_recover_timer = DEFAULT_RECOVER_TIMER, + .ci_toss_secs = DEFAULT_TOSS_SECS, + .ci_scan_secs = DEFAULT_SCAN_SECS, + .ci_log_debug = DEFAULT_LOG_DEBUG }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 9da7839958a..1e978611a96 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -17,14 +17,15 @@ #define DLM_MAX_ADDR_COUNT 3 struct dlm_config_info { - int tcp_port; - int buffer_size; - int rsbtbl_size; - int lkbtbl_size; - int dirtbl_size; - int recover_timer; - int toss_secs; - int scan_secs; + int ci_tcp_port; + int ci_buffer_size; + int ci_rsbtbl_size; + int ci_lkbtbl_size; + int ci_dirtbl_size; + int ci_recover_timer; + int ci_toss_secs; + int ci_scan_secs; + int ci_log_debug; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 1ee8195e6fc..61d93201e1b 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -41,6 +41,7 @@ #include <asm/uaccess.h> #include <linux/dlm.h> +#include "config.h" #define DLM_LOCKSPACE_LEN 64 @@ -69,12 +70,12 @@ struct dlm_mhandle; #define log_error(ls, fmt, args...) \ printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) -#define DLM_LOG_DEBUG -#ifdef DLM_LOG_DEBUG -#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) -#else -#define log_debug(ls, fmt, args...) -#endif +#define log_debug(ls, fmt, args...) \ +do { \ + if (dlm_config.ci_log_debug) \ + printk(KERN_DEBUG "dlm: %s: " fmt "\n", \ + (ls)->ls_name , ##args); \ +} while (0) #define DLM_ASSERT(x, do) \ { \ @@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) /* dlm_header is first element of all structs sent between nodes */ -#define DLM_HEADER_MAJOR 0x00020000 -#define DLM_HEADER_MINOR 0x00000001 +#define DLM_HEADER_MAJOR 0x00030000 +#define DLM_HEADER_MINOR 0x00000000 #define DLM_MSG 1 #define DLM_RCOM 2 @@ -386,6 +387,8 @@ struct dlm_rcom { uint32_t rc_type; /* DLM_RCOM_ */ int rc_result; /* multi-purpose */ uint64_t rc_id; /* match reply with request */ + uint64_t rc_seq; /* sender's ls_recover_seq */ + uint64_t rc_seq_reply; /* remote ls_recover_seq */ char rc_buf[0]; }; @@ -523,6 +526,7 @@ struct dlm_user_proc { spinlock_t asts_spin; struct list_head locks; spinlock_t locks_spin; + struct list_head unlocking; wait_queue_head_t wait; }; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 30878defaeb..e725005fafd 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype) mutex_unlock(&ls->ls_waiters_mutex); } +/* We clear the RESEND flag because we might be taking an lkb off the waiters + list as part of process_requestqueue (e.g. a lookup that has an optimized + request reply on the requestqueue) between dlm_recover_waiters_pre() which + set RESEND and dlm_recover_waiters_post() */ + static int _remove_from_waiters(struct dlm_lkb *lkb) { int error = 0; @@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb) goto out; } lkb->lkb_wait_type = 0; + lkb->lkb_flags &= ~DLM_IFL_RESEND; list_del(&lkb->lkb_wait_reply); unhold_lkb(lkb); out: @@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b) list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, res_hashchain) { if (!time_after_eq(jiffies, r->res_toss_time + - dlm_config.toss_secs * HZ)) + dlm_config.ci_toss_secs * HZ)) continue; found = 1; break; @@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, if (lkb->lkb_astaddr) ms->m_asts |= AST_COMP; - if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) - memcpy(ms->m_extra, r->res_name, r->res_length); + /* compare with switch in create_message; send_remove() doesn't + use send_args() */ - else if (lkb->lkb_lvbptr) + switch (ms->m_type) { + case DLM_MSG_REQUEST: + case DLM_MSG_LOOKUP: + memcpy(ms->m_extra, r->res_name, r->res_length); + break; + case DLM_MSG_CONVERT: + case DLM_MSG_UNLOCK: + case DLM_MSG_REQUEST_REPLY: + case DLM_MSG_CONVERT_REPLY: + case DLM_MSG_GRANT: + if (!lkb->lkb_lvbptr) + break; memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); - + break; + } } static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) @@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); - if (receive_lvb(ls, lkb, ms)) - return -ENOMEM; + if (lkb->lkb_exflags & DLM_LKF_VALBLK) { + /* lkb was just created so there won't be an lvb yet */ + lkb->lkb_lvbptr = allocate_lvb(ls); + if (!lkb->lkb_lvbptr) + return -ENOMEM; + } return 0; } @@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) { struct dlm_message *ms = (struct dlm_message *) hd; struct dlm_ls *ls; - int error; + int error = 0; if (!recovery) dlm_message_in(ms); @@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) out: dlm_put_lockspace(ls); dlm_astd_wake(); - return 0; + return error; } @@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) if (middle_conversion(lkb)) { hold_lkb(lkb); ls->ls_stub_ms.m_result = -EINPROGRESS; + ls->ls_stub_ms.m_flags = lkb->lkb_flags; _remove_from_waiters(lkb); _receive_convert_reply(lkb, &ls->ls_stub_ms); @@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) case DLM_MSG_UNLOCK: hold_lkb(lkb); ls->ls_stub_ms.m_result = -DLM_EUNLOCK; + ls->ls_stub_ms.m_flags = lkb->lkb_flags; _remove_from_waiters(lkb); _receive_unlock_reply(lkb, &ls->ls_stub_ms); dlm_put_lkb(lkb); @@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) case DLM_MSG_CANCEL: hold_lkb(lkb); ls->ls_stub_ms.m_result = -DLM_ECANCEL; + ls->ls_stub_ms.m_flags = lkb->lkb_flags; _remove_from_waiters(lkb); _receive_cancel_reply(lkb, &ls->ls_stub_ms); dlm_put_lkb(lkb); @@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) lock_rsb(r); switch (error) { + case -EBADR: + /* There's a chance the new master received our lock before + dlm_recover_master_reply(), this wouldn't happen if we did + a barrier between recover_masters and recover_locks. */ + log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id, + (unsigned long)r, r->res_name); + dlm_send_rcom_lock(r, lkb); + goto out; case -EEXIST: log_debug(ls, "master copy exists %x", lkb->lkb_id); /* fall through */ @@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) /* an ack for dlm_recover_locks() which waits for replies from all the locks it sends to new masters */ dlm_recovered_lock(r); - + out: unlock_rsb(r); put_rsb(r); dlm_put_lkb(lkb); @@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, } if (flags & DLM_LKF_VALBLK) { - ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); + ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); if (!ua->lksb.sb_lvbptr) { kfree(ua); __put_lkb(ls, lkb); @@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, ua = (struct dlm_user_args *)lkb->lkb_astparam; if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { - ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); + ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); if (!ua->lksb.sb_lvbptr) { error = -ENOMEM; goto out_put; @@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out_put; spin_lock(&ua->proc->locks_spin); - list_del_init(&lkb->lkb_ownqueue); + /* dlm_user_add_ast() may have already taken lkb off the proc list */ + if (!list_empty(&lkb->lkb_ownqueue)) + list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); - - /* this removes the reference for the proc->locks list added by - dlm_user_request */ - unhold_lkb(lkb); out_put: dlm_put_lkb(lkb); out: @@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, /* this lkb was removed from the WAITING queue */ if (lkb->lkb_grmode == DLM_LOCK_IV) { spin_lock(&ua->proc->locks_spin); - list_del_init(&lkb->lkb_ownqueue); + list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); - unhold_lkb(lkb); } out_put: dlm_put_lkb(lkb); @@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) mutex_lock(&ls->ls_clear_proc_locks); list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { - if (lkb->lkb_ast_type) { - list_del(&lkb->lkb_astqueue); - unhold_lkb(lkb); - } - list_del_init(&lkb->lkb_ownqueue); if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { @@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } + + /* in-progress unlocks */ + list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { + list_del_init(&lkb->lkb_ownqueue); + lkb->lkb_flags |= DLM_IFL_DEAD; + dlm_put_lkb(lkb); + } + + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { + list_del(&lkb->lkb_astqueue); + dlm_put_lkb(lkb); + } + mutex_unlock(&ls->ls_clear_proc_locks); unlock_recovery(ls); } + diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 59012b089e8..f40817b53c6 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -236,7 +236,7 @@ static int dlm_scand(void *data) while (!kthread_should_stop()) { list_for_each_entry(ls, &lslist, ls_list) dlm_scan_rsbs(ls); - schedule_timeout_interruptible(dlm_config.scan_secs * HZ); + schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); } return 0; } @@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ls->ls_count = 0; ls->ls_flags = 0; - size = dlm_config.rsbtbl_size; + size = dlm_config.ci_rsbtbl_size; ls->ls_rsbtbl_size = size; ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); @@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, rwlock_init(&ls->ls_rsbtbl[i].lock); } - size = dlm_config.lkbtbl_size; + size = dlm_config.ci_lkbtbl_size; ls->ls_lkbtbl_size = size; ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); @@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ls->ls_lkbtbl[i].counter = 1; } - size = dlm_config.dirtbl_size; + size = dlm_config.ci_dirtbl_size; ls->ls_dirtbl_size = size; ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); @@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, mutex_init(&ls->ls_requestqueue_mutex); mutex_init(&ls->ls_clear_proc_locks); - ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); + ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); if (!ls->ls_recover_buf) goto out_dirfree; diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c index fe158d7a928..dc83a9d979b 100644 --- a/fs/dlm/lowcomms-sctp.c +++ b/fs/dlm/lowcomms-sctp.c @@ -72,6 +72,8 @@ struct nodeinfo { struct list_head writequeue; /* outgoing writequeue_entries */ spinlock_t writequeue_lock; int nodeid; + struct work_struct swork; /* Send workqueue */ + struct work_struct lwork; /* Locking workqueue */ }; static DEFINE_IDR(nodeinfo_idr); @@ -96,6 +98,7 @@ struct connection { atomic_t waiting_requests; struct cbuf cb; int eagain_flag; + struct work_struct work; /* Send workqueue */ }; /* An entry waiting to be sent */ @@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n) static LIST_HEAD(write_nodes); static DEFINE_SPINLOCK(write_nodes_lock); + /* Maximum number of incoming messages to process before * doing a schedule() */ #define MAX_RX_MSG_COUNT 25 -/* Manage daemons */ -static struct task_struct *recv_task; -static struct task_struct *send_task; -static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait); +/* Work queues */ +static struct workqueue_struct *recv_workqueue; +static struct workqueue_struct *send_workqueue; +static struct workqueue_struct *lock_workqueue; /* The SCTP connection */ static struct connection sctp_con; +static void process_send_sockets(struct work_struct *work); +static void process_recv_sockets(struct work_struct *work); +static void process_lock_request(struct work_struct *work); static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) { @@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc) spin_lock_init(&ni->lock); INIT_LIST_HEAD(&ni->writequeue); spin_lock_init(&ni->writequeue_lock); + INIT_WORK(&ni->lwork, process_lock_request); + INIT_WORK(&ni->swork, process_send_sockets); ni->nodeid = nodeid; if (nodeid > max_nodeid) @@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc) /* Data or notification available on socket */ static void lowcomms_data_ready(struct sock *sk, int count_unused) { - atomic_inc(&sctp_con.waiting_requests); if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) - return; - - wake_up_interruptible(&lowcomms_recv_wait); + queue_work(recv_workqueue, &sctp_con.work); } @@ -361,10 +367,10 @@ static void init_failed(void) spin_lock_bh(&write_nodes_lock); list_add_tail(&ni->write_list, &write_nodes); spin_unlock_bh(&write_nodes_lock); + queue_work(send_workqueue, &ni->swork); } } } - wake_up_process(send_task); } /* Something happened to an association */ @@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf) spin_lock_bh(&write_nodes_lock); list_add_tail(&ni->write_list, &write_nodes); spin_unlock_bh(&write_nodes_lock); + queue_work(send_workqueue, &ni->swork); } - wake_up_process(send_task); } break; @@ -580,8 +586,8 @@ static int receive_from_sock(void) spin_lock_bh(&write_nodes_lock); list_add_tail(&ni->write_list, &write_nodes); spin_unlock_bh(&write_nodes_lock); + queue_work(send_workqueue, &ni->swork); } - wake_up_process(send_task); } } @@ -590,6 +596,7 @@ static int receive_from_sock(void) return 0; cbuf_add(&sctp_con.cb, ret); + // PJC: TODO: Add to node's workqueue....can we ?? ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), page_address(sctp_con.rx_page), sctp_con.cb.base, sctp_con.cb.len, @@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num) if (result < 0) log_print("Can't bind to port %d addr number %d", - dlm_config.tcp_port, num); + dlm_config.ci_tcp_port, num); return result; } @@ -711,7 +718,7 @@ static int init_sock(void) /* Bind to all interfaces. */ for (i = 0; i < dlm_local_count; i++) { memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); - make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); + make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len); result = add_bind_addr(&localaddr, addr_len, num); if (result) @@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg) spin_lock_bh(&write_nodes_lock); list_add_tail(&ni->write_list, &write_nodes); spin_unlock_bh(&write_nodes_lock); - wake_up_process(send_task); + + queue_work(send_workqueue, &ni->swork); } return; @@ -863,7 +871,7 @@ static void initiate_association(int nodeid) return; } - make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); + make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen); outmessage.msg_name = &rem_addr; outmessage.msg_namelen = addrlen; @@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid) return 0; } -static int write_list_empty(void) +// PJC: The work queue function for receiving. +static void process_recv_sockets(struct work_struct *work) { - int status; - - spin_lock_bh(&write_nodes_lock); - status = list_empty(&write_nodes); - spin_unlock_bh(&write_nodes_lock); - - return status; -} - -static int dlm_recvd(void *data) -{ - DECLARE_WAITQUEUE(wait, current); - - while (!kthread_should_stop()) { + if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { + int ret; int count = 0; - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&lowcomms_recv_wait, &wait); - if (!test_bit(CF_READ_PENDING, &sctp_con.flags)) - cond_resched(); - remove_wait_queue(&lowcomms_recv_wait, &wait); - set_current_state(TASK_RUNNING); - - if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { - int ret; - - do { - ret = receive_from_sock(); + do { + ret = receive_from_sock(); - /* Don't starve out everyone else */ - if (++count >= MAX_RX_MSG_COUNT) { - cond_resched(); - count = 0; - } - } while (!kthread_should_stop() && ret >=0); - } - cond_resched(); + /* Don't starve out everyone else */ + if (++count >= MAX_RX_MSG_COUNT) { + cond_resched(); + count = 0; + } + } while (!kthread_should_stop() && ret >=0); } - - return 0; + cond_resched(); } -static int dlm_sendd(void *data) +// PJC: the work queue function for sending +static void process_send_sockets(struct work_struct *work) { - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); - - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - if (write_list_empty()) - cond_resched(); - set_current_state(TASK_RUNNING); - - if (sctp_con.eagain_flag) { - sctp_con.eagain_flag = 0; - refill_write_queue(); - } - process_output_queue(); + if (sctp_con.eagain_flag) { + sctp_con.eagain_flag = 0; + refill_write_queue(); } + process_output_queue(); +} - remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); - - return 0; +// PJC: Process lock requests from a particular node. +// TODO: can we optimise this out on UP ?? +static void process_lock_request(struct work_struct *work) +{ } static void daemons_stop(void) { - kthread_stop(recv_task); - kthread_stop(send_task); + destroy_workqueue(recv_workqueue); + destroy_workqueue(send_workqueue); + destroy_workqueue(lock_workqueue); } static int daemons_start(void) { - struct task_struct *p; int error; + recv_workqueue = create_workqueue("dlm_recv"); + error = IS_ERR(recv_workqueue); + if (error) { + log_print("can't start dlm_recv %d", error); + return error; + } - p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); - error = IS_ERR(p); + send_workqueue = create_singlethread_workqueue("dlm_send"); + error = IS_ERR(send_workqueue); if (error) { - log_print("can't start dlm_recvd %d", error); + log_print("can't start dlm_send %d", error); + destroy_workqueue(recv_workqueue); return error; } - recv_task = p; - p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); - error = IS_ERR(p); + lock_workqueue = create_workqueue("dlm_rlock"); + error = IS_ERR(lock_workqueue); if (error) { - log_print("can't start dlm_sendd %d", error); - kthread_stop(recv_task); + log_print("can't start dlm_rlock %d", error); + destroy_workqueue(send_workqueue); + destroy_workqueue(recv_workqueue); return error; } - send_task = p; return 0; } @@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void) { int error; + INIT_WORK(&sctp_con.work, process_recv_sockets); + error = init_sock(); if (error) goto fail_sock; @@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void) for (i = 0; i < dlm_local_count; i++) kfree(dlm_local_addr[i]); } - diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c index 9be3a440c42..f1efd17b261 100644 --- a/fs/dlm/lowcomms-tcp.c +++ b/fs/dlm/lowcomms-tcp.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb) struct connection { struct socket *sock; /* NULL if not connected */ uint32_t nodeid; /* So we know who we are in the list */ - struct rw_semaphore sock_sem; /* Stop connect races */ - struct list_head read_list; /* On this list when ready for reading */ - struct list_head write_list; /* On this list when ready for writing */ - struct list_head state_list; /* On this list when ready to connect */ + struct mutex sock_mutex; unsigned long flags; /* bit 1,2 = We are on the read/write lists */ #define CF_READ_PENDING 1 #define CF_WRITE_PENDING 2 @@ -112,9 +109,10 @@ struct connection { struct page *rx_page; struct cbuf cb; int retries; - atomic_t waiting_requests; #define MAX_CONNECT_RETRIES 3 struct connection *othercon; + struct work_struct rwork; /* Receive workqueue */ + struct work_struct swork; /* Send workqueue */ }; #define sock2con(x) ((struct connection *)(x)->sk_user_data) @@ -131,14 +129,9 @@ struct writequeue_entry { static struct sockaddr_storage dlm_local_addr; -/* Manage daemons */ -static struct task_struct *recv_task; -static struct task_struct *send_task; - -static wait_queue_t lowcomms_send_waitq_head; -static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq); -static wait_queue_t lowcomms_recv_waitq_head; -static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq); +/* Work queues */ +static struct workqueue_struct *recv_workqueue; +static struct workqueue_struct *send_workqueue; /* An array of pointers to connections, indexed by NODEID */ static struct connection **connections; @@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock); static struct kmem_cache *con_cache; static int conn_array_size; -/* List of sockets that have reads pending */ -static LIST_HEAD(read_sockets); -static DEFINE_SPINLOCK(read_sockets_lock); - -/* List of sockets which have writes pending */ -static LIST_HEAD(write_sockets); -static DEFINE_SPINLOCK(write_sockets_lock); - -/* List of sockets which have connects pending */ -static LIST_HEAD(state_sockets); -static DEFINE_SPINLOCK(state_sockets_lock); +static void process_recv_sockets(struct work_struct *work); +static void process_send_sockets(struct work_struct *work); static struct connection *nodeid2con(int nodeid, gfp_t allocation) { @@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) goto finish; con->nodeid = nodeid; - init_rwsem(&con->sock_sem); + mutex_init(&con->sock_mutex); INIT_LIST_HEAD(&con->writequeue); spin_lock_init(&con->writequeue_lock); + INIT_WORK(&con->swork, process_send_sockets); + INIT_WORK(&con->rwork, process_recv_sockets); connections[nodeid] = con; } @@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused) { struct connection *con = sock2con(sk); - atomic_inc(&con->waiting_requests); - if (test_and_set_bit(CF_READ_PENDING, &con->flags)) - return; - - spin_lock_bh(&read_sockets_lock); - list_add_tail(&con->read_list, &read_sockets); - spin_unlock_bh(&read_sockets_lock); - - wake_up_interruptible(&lowcomms_recv_waitq); + if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) + queue_work(recv_workqueue, &con->rwork); } static void lowcomms_write_space(struct sock *sk) { struct connection *con = sock2con(sk); - if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) - return; - - spin_lock_bh(&write_sockets_lock); - list_add_tail(&con->write_list, &write_sockets); - spin_unlock_bh(&write_sockets_lock); - - wake_up_interruptible(&lowcomms_send_waitq); + if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) + queue_work(send_workqueue, &con->swork); } static inline void lowcomms_connect_sock(struct connection *con) { - if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) - return; - - spin_lock_bh(&state_sockets_lock); - list_add_tail(&con->state_list, &state_sockets); - spin_unlock_bh(&state_sockets_lock); - - wake_up_interruptible(&lowcomms_send_waitq); + if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) + queue_work(send_workqueue, &con->swork); } static void lowcomms_state_change(struct sock *sk) @@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, /* Close a remote connection and tidy up */ static void close_connection(struct connection *con, bool and_other) { - down_write(&con->sock_sem); + mutex_lock(&con->sock_mutex); if (con->sock) { sock_release(con->sock); @@ -294,7 +261,7 @@ static void close_connection(struct connection *con, bool and_other) con->rx_page = NULL; } con->retries = 0; - up_write(&con->sock_sem); + mutex_unlock(&con->sock_mutex); } /* Data received from remote end */ @@ -308,10 +275,13 @@ static int receive_from_sock(struct connection *con) int r; int call_again_soon = 0; - down_read(&con->sock_sem); + mutex_lock(&con->sock_mutex); + + if (con->sock == NULL) { + ret = -EAGAIN; + goto out_close; + } - if (con->sock == NULL) - goto out; if (con->rx_page == NULL) { /* * This doesn't need to be atomic, but I think it should @@ -359,6 +329,9 @@ static int receive_from_sock(struct connection *con) if (ret <= 0) goto out_close; + if (ret == -EAGAIN) + goto out_resched; + if (ret == len) call_again_soon = 1; cbuf_add(&con->cb, ret); @@ -381,24 +354,26 @@ static int receive_from_sock(struct connection *con) con->rx_page = NULL; } -out: if (call_again_soon) goto out_resched; - up_read(&con->sock_sem); + mutex_unlock(&con->sock_mutex); return 0; out_resched: - lowcomms_data_ready(con->sock->sk, 0); - up_read(&con->sock_sem); - cond_resched(); - return 0; + if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) + queue_work(recv_workqueue, &con->rwork); + mutex_unlock(&con->sock_mutex); + return -EAGAIN; out_close: - up_read(&con->sock_sem); + mutex_unlock(&con->sock_mutex); if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { close_connection(con, false); /* Reconnect when there is something to send */ } + /* Don't return success if we really got EOF */ + if (ret == 0) + ret = -EAGAIN; return ret; } @@ -412,6 +387,7 @@ static int accept_from_sock(struct connection *con) int len; int nodeid; struct connection *newcon; + struct connection *addcon; memset(&peeraddr, 0, sizeof(peeraddr)); result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, @@ -419,7 +395,7 @@ static int accept_from_sock(struct connection *con) if (result < 0) return -ENOMEM; - down_read(&con->sock_sem); + mutex_lock_nested(&con->sock_mutex, 0); result = -ENOTCONN; if (con->sock == NULL) @@ -445,7 +421,7 @@ static int accept_from_sock(struct connection *con) if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { printk("dlm: connect from non cluster node\n"); sock_release(newsock); - up_read(&con->sock_sem); + mutex_unlock(&con->sock_mutex); return -1; } @@ -462,7 +438,7 @@ static int accept_from_sock(struct connection *con) result = -ENOMEM; goto accept_err; } - down_write(&newcon->sock_sem); + mutex_lock_nested(&newcon->sock_mutex, 1); if (newcon->sock) { struct connection *othercon = newcon->othercon; @@ -470,41 +446,45 @@ static int accept_from_sock(struct connection *con) othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); if (!othercon) { printk("dlm: failed to allocate incoming socket\n"); - up_write(&newcon->sock_sem); + mutex_unlock(&newcon->sock_mutex); result = -ENOMEM; goto accept_err; } othercon->nodeid = nodeid; othercon->rx_action = receive_from_sock; - init_rwsem(&othercon->sock_sem); + mutex_init(&othercon->sock_mutex); + INIT_WORK(&othercon->swork, process_send_sockets); + INIT_WORK(&othercon->rwork, process_recv_sockets); set_bit(CF_IS_OTHERCON, &othercon->flags); newcon->othercon = othercon; } othercon->sock = newsock; newsock->sk->sk_user_data = othercon; add_sock(newsock, othercon); + addcon = othercon; } else { newsock->sk->sk_user_data = newcon; newcon->rx_action = receive_from_sock; add_sock(newsock, newcon); - + addcon = newcon; } - up_write(&newcon->sock_sem); + mutex_unlock(&newcon->sock_mutex); /* * Add it to the active queue in case we got data * beween processing the accept adding the socket * to the read_sockets list */ - lowcomms_data_ready(newsock->sk, 0); - up_read(&con->sock_sem); + if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) + queue_work(recv_workqueue, &addcon->rwork); + mutex_unlock(&con->sock_mutex); return 0; accept_err: - up_read(&con->sock_sem); + mutex_unlock(&con->sock_mutex); sock_release(newsock); if (result != -EAGAIN) @@ -525,7 +505,7 @@ static void connect_to_sock(struct connection *con) return; } - down_write(&con->sock_sem); + mutex_lock(&con->sock_mutex); if (con->retries++ > MAX_CONNECT_RETRIES) goto out; @@ -548,7 +528,7 @@ static void connect_to_sock(struct connection *con) sock->sk->sk_user_data = con; con->rx_action = receive_from_sock; - make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); + make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); add_sock(sock, con); @@ -577,7 +557,7 @@ out_err: result = 0; } out: - up_write(&con->sock_sem); + mutex_unlock(&con->sock_mutex); return; } @@ -616,10 +596,10 @@ static struct socket *create_listen_sock(struct connection *con, con->sock = sock; /* Bind to our port */ - make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); + make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); if (result < 0) { - printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); + printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port); sock_release(sock); sock = NULL; con->sock = NULL; @@ -638,7 +618,7 @@ static struct socket *create_listen_sock(struct connection *con, result = sock->ops->listen(sock, 5); if (result < 0) { - printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); + printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port); sock_release(sock); sock = NULL; goto create_out; @@ -709,6 +689,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, if (!con) return NULL; + spin_lock(&con->writequeue_lock); e = list_entry(con->writequeue.prev, struct writequeue_entry, list); if ((&e->list == &con->writequeue) || (PAGE_CACHE_SIZE - e->end < len)) { @@ -747,6 +728,7 @@ void dlm_lowcomms_commit_buffer(void *mh) struct connection *con = e->con; int users; + spin_lock(&con->writequeue_lock); users = --e->users; if (users) goto out; @@ -754,12 +736,8 @@ void dlm_lowcomms_commit_buffer(void *mh) kunmap(e->page); spin_unlock(&con->writequeue_lock); - if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { - spin_lock_bh(&write_sockets_lock); - list_add_tail(&con->write_list, &write_sockets); - spin_unlock_bh(&write_sockets_lock); - - wake_up_interruptible(&lowcomms_send_waitq); + if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { + queue_work(send_workqueue, &con->swork); } return; @@ -783,7 +761,7 @@ static void send_to_sock(struct connection *con) struct writequeue_entry *e; int len, offset; - down_read(&con->sock_sem); + mutex_lock(&con->sock_mutex); if (con->sock == NULL) goto out_connect; @@ -800,6 +778,7 @@ static void send_to_sock(struct connection *con) offset = e->offset; BUG_ON(len == 0 && e->users == 0); spin_unlock(&con->writequeue_lock); + kmap(e->page); ret = 0; if (len) { @@ -828,18 +807,18 @@ static void send_to_sock(struct connection *con) } spin_unlock(&con->writequeue_lock); out: - up_read(&con->sock_sem); + mutex_unlock(&con->sock_mutex); return; send_error: - up_read(&con->sock_sem); + mutex_unlock(&con->sock_mutex); close_connection(con, false); lowcomms_connect_sock(con); return; out_connect: - up_read(&con->sock_sem); - lowcomms_connect_sock(con); + mutex_unlock(&con->sock_mutex); + connect_to_sock(con); return; } @@ -872,7 +851,6 @@ int dlm_lowcomms_close(int nodeid) if (con) { clean_one_writequeue(con); close_connection(con, true); - atomic_set(&con->waiting_requests, 0); } return 0; @@ -880,102 +858,29 @@ out: return -1; } -/* API send message call, may queue the request */ -/* N.B. This is the old interface - use the new one for new calls */ -int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation) -{ - struct writequeue_entry *e; - char *b; - - e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b); - if (e) { - memcpy(b, buf, len); - dlm_lowcomms_commit_buffer(e); - return 0; - } - return -ENOBUFS; -} - /* Look for activity on active sockets */ -static void process_sockets(void) +static void process_recv_sockets(struct work_struct *work) { - struct list_head *list; - struct list_head *temp; - int count = 0; - - spin_lock_bh(&read_sockets_lock); - list_for_each_safe(list, temp, &read_sockets) { + struct connection *con = container_of(work, struct connection, rwork); + int err; - struct connection *con = - list_entry(list, struct connection, read_list); - list_del(&con->read_list); - clear_bit(CF_READ_PENDING, &con->flags); - - spin_unlock_bh(&read_sockets_lock); - - /* This can reach zero if we are processing requests - * as they come in. - */ - if (atomic_read(&con->waiting_requests) == 0) { - spin_lock_bh(&read_sockets_lock); - continue; - } - - do { - con->rx_action(con); - - /* Don't starve out everyone else */ - if (++count >= MAX_RX_MSG_COUNT) { - cond_resched(); - count = 0; - } - - } while (!atomic_dec_and_test(&con->waiting_requests) && - !kthread_should_stop()); - - spin_lock_bh(&read_sockets_lock); - } - spin_unlock_bh(&read_sockets_lock); + clear_bit(CF_READ_PENDING, &con->flags); + do { + err = con->rx_action(con); + } while (!err); } -/* Try to send any messages that are pending - */ -static void process_output_queue(void) -{ - struct list_head *list; - struct list_head *temp; - - spin_lock_bh(&write_sockets_lock); - list_for_each_safe(list, temp, &write_sockets) { - struct connection *con = - list_entry(list, struct connection, write_list); - clear_bit(CF_WRITE_PENDING, &con->flags); - list_del(&con->write_list); - - spin_unlock_bh(&write_sockets_lock); - send_to_sock(con); - spin_lock_bh(&write_sockets_lock); - } - spin_unlock_bh(&write_sockets_lock); -} -static void process_state_queue(void) +static void process_send_sockets(struct work_struct *work) { - struct list_head *list; - struct list_head *temp; - - spin_lock_bh(&state_sockets_lock); - list_for_each_safe(list, temp, &state_sockets) { - struct connection *con = - list_entry(list, struct connection, state_list); - list_del(&con->state_list); - clear_bit(CF_CONNECT_PENDING, &con->flags); - spin_unlock_bh(&state_sockets_lock); + struct connection *con = container_of(work, struct connection, swork); + if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { connect_to_sock(con); - spin_lock_bh(&state_sockets_lock); } - spin_unlock_bh(&state_sockets_lock); + + clear_bit(CF_WRITE_PENDING, &con->flags); + send_to_sock(con); } @@ -992,109 +897,33 @@ static void clean_writequeues(void) } } -static int read_list_empty(void) +static void work_stop(void) { - int status; - - spin_lock_bh(&read_sockets_lock); - status = list_empty(&read_sockets); - spin_unlock_bh(&read_sockets_lock); - - return status; -} - -/* DLM Transport comms receive daemon */ -static int dlm_recvd(void *data) -{ - init_waitqueue_entry(&lowcomms_recv_waitq_head, current); - add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); - - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - if (read_list_empty()) - cond_resched(); - set_current_state(TASK_RUNNING); - - process_sockets(); - } - - return 0; + destroy_workqueue(recv_workqueue); + destroy_workqueue(send_workqueue); } -static int write_and_state_lists_empty(void) +static int work_start(void) { - int status; - - spin_lock_bh(&write_sockets_lock); - status = list_empty(&write_sockets); - spin_unlock_bh(&write_sockets_lock); - - spin_lock_bh(&state_sockets_lock); - if (list_empty(&state_sockets) == 0) - status = 0; - spin_unlock_bh(&state_sockets_lock); - - return status; -} - -/* DLM Transport send daemon */ -static int dlm_sendd(void *data) -{ - init_waitqueue_entry(&lowcomms_send_waitq_head, current); - add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head); - - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - if (write_and_state_lists_empty()) - cond_resched(); - set_current_state(TASK_RUNNING); - - process_state_queue(); - process_output_queue(); - } - - return 0; -} - -static void daemons_stop(void) -{ - kthread_stop(recv_task); - kthread_stop(send_task); -} - -static int daemons_start(void) -{ - struct task_struct *p; int error; - - p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); - error = IS_ERR(p); + recv_workqueue = create_workqueue("dlm_recv"); + error = IS_ERR(recv_workqueue); if (error) { - log_print("can't start dlm_recvd %d", error); + log_print("can't start dlm_recv %d", error); return error; } - recv_task = p; - p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); - error = IS_ERR(p); + send_workqueue = create_singlethread_workqueue("dlm_send"); + error = IS_ERR(send_workqueue); if (error) { - log_print("can't start dlm_sendd %d", error); - kthread_stop(recv_task); + log_print("can't start dlm_send %d", error); + destroy_workqueue(recv_workqueue); return error; } - send_task = p; return 0; } -/* - * Return the largest buffer size we can cope with. - */ -int lowcomms_max_buffer_size(void) -{ - return PAGE_CACHE_SIZE; -} - void dlm_lowcomms_stop(void) { int i; @@ -1107,7 +936,7 @@ void dlm_lowcomms_stop(void) connections[i]->flags |= 0xFF; } - daemons_stop(); + work_stop(); clean_writequeues(); for (i = 0; i < conn_array_size; i++) { @@ -1159,7 +988,7 @@ int dlm_lowcomms_start(void) if (error) goto fail_unlisten; - error = daemons_start(); + error = work_start(); if (error) goto fail_unlisten; diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index c9b1c3d535f..a5126e0c68a 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, if (msglen < sizeof(struct dlm_header)) break; err = -E2BIG; - if (msglen > dlm_config.buffer_size) { + if (msglen > dlm_config.ci_buffer_size) { log_print("message size %d from %d too big, buf len %d", msglen, nodeid, len); break; @@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, if (msglen > sizeof(__tmp) && msg == (struct dlm_header *) __tmp) { - msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); + msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); if (msg == NULL) return ret; } diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 4cc31be9cd9..6bfbd615380 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len, rc->rc_type = type; + spin_lock(&ls->ls_recover_lock); + rc->rc_seq = ls->ls_recover_seq; + spin_unlock(&ls->ls_recover_lock); + *mh_ret = mh; *rc_ret = rc; return 0; @@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf) rf->rf_lsflags = ls->ls_exflags; } -static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) +static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { + struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; + + if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { + log_error(ls, "version mismatch: %x nodeid %d: %x", + DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, + rc->rc_header.h_version); + return -EINVAL; + } + if (rf->rf_lvblen != ls->ls_lvblen || rf->rf_lsflags != ls->ls_exflags) { log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", @@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) goto out; allow_sync_reply(ls, &rc->rc_id); - memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); + memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); send_rcom(ls, mh, rc); @@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) log_debug(ls, "remote node %d not ready", nodeid); rc->rc_result = 0; } else - error = check_config(ls, (struct rcom_config *) rc->rc_buf, - nodeid); + error = check_config(ls, rc, nodeid); /* the caller looks at rc_result for the remote recovery status */ out: return error; @@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) if (error) return; rc->rc_id = rc_in->rc_id; + rc->rc_seq_reply = rc_in->rc_seq; rc->rc_result = dlm_recover_status(ls); make_config(ls, (struct rcom_config *) rc->rc_buf); @@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) if (nodeid == dlm_our_nodeid()) { dlm_copy_master_names(ls, last_name, last_len, ls->ls_recover_buf + len, - dlm_config.buffer_size - len, nodeid); + dlm_config.ci_buffer_size - len, nodeid); goto out; } @@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) memcpy(rc->rc_buf, last_name, last_len); allow_sync_reply(ls, &rc->rc_id); - memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); + memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); send_rcom(ls, mh, rc); @@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct dlm_mhandle *mh; - int error, inlen, outlen; - int nodeid = rc_in->rc_header.h_nodeid; - uint32_t status = dlm_recover_status(ls); - - /* - * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while - * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes). - * It could only happen in rare cases where we get a late NAMES - * message from a previous instance of recovery. - */ - - if (!(status & DLM_RS_NODES)) { - log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid); - return; - } + int error, inlen, outlen, nodeid; nodeid = rc_in->rc_header.h_nodeid; inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); - outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); + outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom); error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); if (error) return; rc->rc_id = rc_in->rc_id; + rc->rc_seq_reply = rc_in->rc_seq; dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, nodeid); @@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) ret_nodeid = error; rc->rc_result = ret_nodeid; rc->rc_id = rc_in->rc_id; + rc->rc_seq_reply = rc_in->rc_seq; send_rcom(ls, mh, rc); } @@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); rc->rc_id = rc_in->rc_id; + rc->rc_seq_reply = rc_in->rc_seq; send_rcom(ls, mh, rc); } static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) { - uint32_t status = dlm_recover_status(ls); - - if (!(status & DLM_RS_DIR)) { - log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u", - rc_in->rc_header.h_nodeid); - return; - } - dlm_recover_process_copy(ls, rc_in); } @@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) rc->rc_type = DLM_RCOM_STATUS_REPLY; rc->rc_id = rc_in->rc_id; + rc->rc_seq_reply = rc_in->rc_seq; rc->rc_result = -ESRCH; rf = (struct rcom_config *) rc->rc_buf; @@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) return 0; } +static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) +{ + uint64_t seq; + int rv = 0; + + switch (rc->rc_type) { + case DLM_RCOM_STATUS_REPLY: + case DLM_RCOM_NAMES_REPLY: + case DLM_RCOM_LOOKUP_REPLY: + case DLM_RCOM_LOCK_REPLY: + spin_lock(&ls->ls_recover_lock); + seq = ls->ls_recover_seq; + spin_unlock(&ls->ls_recover_lock); + if (rc->rc_seq_reply != seq) { + log_debug(ls, "ignoring old reply %x from %d " + "seq_reply %llx expect %llx", + rc->rc_type, rc->rc_header.h_nodeid, + (unsigned long long)rc->rc_seq_reply, + (unsigned long long)seq); + rv = 1; + } + } + return rv; +} + /* Called by dlm_recvd; corresponds to dlm_receive_message() but special recovery-only comms are sent through here. */ @@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) } if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { - log_error(ls, "ignoring recovery message %x from %d", + log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); goto out; } + if (is_old_reply(ls, rc)) + goto out; + if (nodeid != rc->rc_header.h_nodeid) { log_error(ls, "bad rcom nodeid %d from %d", rc->rc_header.h_nodeid, nodeid); diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index cf9f6831bab..c2cc7694cd1 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -44,7 +44,7 @@ static void dlm_wait_timer_fn(unsigned long data) { struct dlm_ls *ls = (struct dlm_ls *) data; - mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); + mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ)); wake_up(&ls->ls_wait_general); } @@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) init_timer(&ls->ls_timer); ls->ls_timer.function = dlm_wait_timer_fn; ls->ls_timer.data = (long) ls; - ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); + ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ); add_timer(&ls->ls_timer); wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); @@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls) if (dlm_no_directory(ls)) count += recover_master_static(r); - else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { + else if (!is_master(r) && + (dlm_is_removed(ls, r->res_nodeid) || + rsb_flag(r, RSB_NEW_MASTER))) { recover_master(r); count++; } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 650536aa513..3cb636d6024 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_members(ls, rv, &neg); if (error) { - log_error(ls, "recover_members failed %d", error); + log_debug(ls, "recover_members failed %d", error); goto fail; } start = jiffies; @@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_directory(ls); if (error) { - log_error(ls, "recover_directory failed %d", error); + log_debug(ls, "recover_directory failed %d", error); goto fail; } @@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_directory_wait(ls); if (error) { - log_error(ls, "recover_directory_wait failed %d", error); + log_debug(ls, "recover_directory_wait failed %d", error); goto fail; } @@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_masters(ls); if (error) { - log_error(ls, "recover_masters failed %d", error); + log_debug(ls, "recover_masters failed %d", error); goto fail; } @@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_locks(ls); if (error) { - log_error(ls, "recover_locks failed %d", error); + log_debug(ls, "recover_locks failed %d", error); goto fail; } error = dlm_recover_locks_wait(ls); if (error) { - log_error(ls, "recover_locks_wait failed %d", error); + log_debug(ls, "recover_locks_wait failed %d", error); goto fail; } @@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_locks_wait(ls); if (error) { - log_error(ls, "recover_locks_wait failed %d", error); + log_debug(ls, "recover_locks_wait failed %d", error); goto fail; } } @@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_set_recover_status(ls, DLM_RS_DONE); error = dlm_recover_done_wait(ls); if (error) { - log_error(ls, "recover_done_wait failed %d", error); + log_debug(ls, "recover_done_wait failed %d", error); goto fail; } @@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = enable_locking(ls, rv->seq); if (error) { - log_error(ls, "enable_locking failed %d", error); + log_debug(ls, "enable_locking failed %d", error); goto fail; } error = dlm_process_requestqueue(ls); if (error) { - log_error(ls, "process_requestqueue failed %d", error); + log_debug(ls, "process_requestqueue failed %d", error); goto fail; } error = dlm_recover_waiters_post(ls); if (error) { - log_error(ls, "recover_waiters_post failed %d", error); + log_debug(ls, "recover_waiters_post failed %d", error); goto fail; } diff --git a/fs/dlm/user.c b/fs/dlm/user.c index c37e93e4f2d..d378b7fe2a1 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) remove_ownqueue = 1; + /* unlocks or cancels of waiting requests need to be removed from the + proc's unlocking list, again there must be a better way... */ + + if (ua->lksb.sb_status == -DLM_EUNLOCK || + (ua->lksb.sb_status == -DLM_ECANCEL && + lkb->lkb_grmode == DLM_LOCK_IV)) + remove_ownqueue = 1; + /* We want to copy the lvb to userspace when the completion ast is read if the status is 0, the lock has an lvb and lvb_ops says we should. We could probably have set_lvb_lock() @@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file) proc->lockspace = ls->ls_local_handle; INIT_LIST_HEAD(&proc->asts); INIT_LIST_HEAD(&proc->locks); + INIT_LIST_HEAD(&proc->unlocking); spin_lock_init(&proc->asts_spin); spin_lock_init(&proc->locks_spin); init_waitqueue_head(&proc->wait); diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 767197db994..963889cf674 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c @@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc) rc->rc_type = cpu_to_le32(rc->rc_type); rc->rc_result = cpu_to_le32(rc->rc_result); rc->rc_id = cpu_to_le64(rc->rc_id); + rc->rc_seq = cpu_to_le64(rc->rc_seq); + rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); if (type == DLM_RCOM_LOCK) rcom_lock_out((struct rcom_lock *) rc->rc_buf); @@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc) rc->rc_type = le32_to_cpu(rc->rc_type); rc->rc_result = le32_to_cpu(rc->rc_result); rc->rc_id = le64_to_cpu(rc->rc_id); + rc->rc_seq = le64_to_cpu(rc->rc_seq); + rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); if (rc->rc_type == DLM_RCOM_LOCK) rcom_lock_in((struct rcom_lock *) rc->rc_buf); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c403b66ec83..a4b142a6a2c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -251,8 +251,19 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) WARN_ON(inode->i_state & I_WILL_FREE); if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { + struct address_space *mapping = inode->i_mapping; + int ret; + list_move(&inode->i_list, &inode->i_sb->s_dirty); - return 0; + + /* + * Even if we don't actually write the inode itself here, + * we can at least start some of the data writeout.. + */ + spin_unlock(&inode_lock); + ret = do_writepages(mapping, wbc); + spin_lock(&inode_lock); + return ret; } /* diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 8c58bd45399..1794305f9ed 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -193,8 +193,12 @@ static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags, static void fuse_ctl_kill_sb(struct super_block *sb) { + struct fuse_conn *fc; + mutex_lock(&fuse_mutex); fuse_control_sb = NULL; + list_for_each_entry(fc, &fuse_conn_list, entry) + fc->ctl_ndents = 0; mutex_unlock(&fuse_mutex); kill_litter_super(sb); diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 6a2ffa2db14..de8e64c03f7 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -4,44 +4,43 @@ config GFS2_FS select FS_POSIX_ACL select CRC32 help - A cluster filesystem. + A cluster filesystem. - Allows a cluster of computers to simultaneously use a block device - that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads - and writes to the block device like a local filesystem, but also uses - a lock module to allow the computers coordinate their I/O so - filesystem consistency is maintained. One of the nifty features of - GFS is perfect consistency -- changes made to the filesystem on one - machine show up immediately on all other machines in the cluster. + Allows a cluster of computers to simultaneously use a block device + that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads + and writes to the block device like a local filesystem, but also uses + a lock module to allow the computers coordinate their I/O so + filesystem consistency is maintained. One of the nifty features of + GFS is perfect consistency -- changes made to the filesystem on one + machine show up immediately on all other machines in the cluster. - To use the GFS2 filesystem, you will need to enable one or more of - the below locking modules. Documentation and utilities for GFS2 can - be found here: http://sources.redhat.com/cluster + To use the GFS2 filesystem, you will need to enable one or more of + the below locking modules. Documentation and utilities for GFS2 can + be found here: http://sources.redhat.com/cluster config GFS2_FS_LOCKING_NOLOCK tristate "GFS2 \"nolock\" locking module" depends on GFS2_FS help - Single node locking module for GFS2. + Single node locking module for GFS2. - Use this module if you want to use GFS2 on a single node without - its clustering features. You can still take advantage of the - large file support, and upgrade to running a full cluster later on - if required. + Use this module if you want to use GFS2 on a single node without + its clustering features. You can still take advantage of the + large file support, and upgrade to running a full cluster later on + if required. - If you will only be using GFS2 in cluster mode, you do not need this - module. + If you will only be using GFS2 in cluster mode, you do not need this + module. config GFS2_FS_LOCKING_DLM tristate "GFS2 DLM locking module" - depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n) + depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n) select IP_SCTP if DLM_SCTP select CONFIGFS_FS select DLM help - Multiple node locking module for GFS2 - - Most users of GFS2 will require this module. It provides the locking - interface between GFS2 and the DLM, which is required to use GFS2 - in a cluster environment. + Multiple node locking module for GFS2 + Most users of GFS2 will require this module. It provides the locking + interface between GFS2 and the DLM, which is required to use GFS2 + in a cluster environment. diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8240c1ff94f..113f6c9110c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_free_data(ip, bstart, blen); } - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(ip, dibh->b_data); @@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) } ip->i_di.di_size = size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) @@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (gfs2_is_stuffed(ip)) { ip->i_di.di_size = size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); @@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (!error) { ip->i_di.di_size = size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip) ip->i_num.no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); } - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0fdcb7713cd..c93ca8f361b 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_di.di_size < offset + size) ip->i_di.di_size = offset + size; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -229,7 +229,7 @@ out: if (ip->i_di.di_size < offset + copied) ip->i_di.di_size = offset + copied; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b) */ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, - void *opaque, gfs2_filldir_t filldir, + void *opaque, filldir_t filldir, const struct gfs2_dirent **darr, u32 entries, int *copied) { const struct gfs2_dirent *dent, *dent_next; - struct gfs2_inum_host inum; u64 off, off_next; unsigned int x, y; int run = 0; @@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, *offset = off; } - gfs2_inum_in(&inum, (char *)&dent->de_inum); - error = filldir(opaque, (const char *)(dent + 1), be16_to_cpu(dent->de_name_len), - off, &inum, + off, be64_to_cpu(dent->de_inum.no_addr), be16_to_cpu(dent->de_type)); if (error) return 1; @@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, } static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, - gfs2_filldir_t filldir, int *copied, - unsigned *depth, u64 leaf_no) + filldir_t filldir, int *copied, unsigned *depth, + u64 leaf_no) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh; @@ -1343,7 +1340,7 @@ out: */ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, - gfs2_filldir_t filldir) + filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1402,7 +1399,7 @@ out: } int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, - gfs2_filldir_t filldir) + filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); struct dirent_gather g; @@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, break; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_entries++; - ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; @@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_consist_inode(dip); gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_di.di_entries--; - dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); + dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); @@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, gfs2_trans_add_bh(dip->i_gl, bh, 1); } - dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); + dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_dinode_out(dip, bh->b_data); brelse(bh); return 0; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index b21b33668a5..48fe89046bb 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -16,30 +16,13 @@ struct inode; struct gfs2_inode; struct gfs2_inum; -/** - * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read() - * @opaque: opaque data used by the function - * @name: the name of the directory entry - * @length: the length of the name - * @offset: the entry's offset in the directory - * @inum: the inode number the entry points to - * @type: the type of inode the entry points to - * - * Returns: 0 on success, 1 if buffer full - */ - -typedef int (*gfs2_filldir_t) (void *opaque, - const char *name, unsigned int length, - u64 offset, - struct gfs2_inum_host *inum, unsigned int type); - int gfs2_dir_search(struct inode *dir, const struct qstr *filename, struct gfs2_inum_host *inum, unsigned int *type); int gfs2_dir_add(struct inode *inode, const struct qstr *filename, const struct gfs2_inum_host *inum, unsigned int type); int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); -int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, - gfs2_filldir_t filldir); +int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, + filldir_t filldir); int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, struct gfs2_inum_host *new_inum, unsigned int new_type); diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index ebebbdcd705..0c83c7f4dda 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 438146904b5..6618c119025 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -19,6 +19,8 @@ #include <linux/gfs2_ondisk.h> #include <linux/list.h> #include <linux/lm_interface.h> +#include <linux/wait.h> +#include <linux/rwsem.h> #include <asm/uaccess.h> #include "gfs2.h" @@ -33,11 +35,6 @@ #include "super.h" #include "util.h" -struct greedy { - struct gfs2_holder gr_gh; - struct delayed_work gr_work; -}; - struct gfs2_gl_hash_bucket { struct hlist_head hb_list; }; @@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); static int dump_glock(struct gfs2_glock *gl); static int dump_inode(struct gfs2_inode *ip); +static void gfs2_glock_xmote_th(struct gfs2_holder *gh); +static void gfs2_glock_drop_th(struct gfs2_glock *gl); +static DECLARE_RWSEM(gfs2_umount_flush_sem); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) @@ -213,30 +213,6 @@ out: } /** - * queue_empty - check to see if a glock's queue is empty - * @gl: the glock - * @head: the head of the queue to check - * - * This function protects the list in the event that a process already - * has a holder on the list and is adding a second holder for itself. - * The glmutex lock is what generally prevents processes from working - * on the same glock at once, but the special case of adding a second - * holder for yourself ("recursive" locking) doesn't involve locking - * glmutex, making the spin lock necessary. - * - * Returns: 1 if the queue is empty - */ - -static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head) -{ - int empty; - spin_lock(&gl->gl_spin); - empty = list_empty(head); - spin_unlock(&gl->gl_spin); - return empty; -} - -/** * search_bucket() - Find struct gfs2_glock by lock number * @bucket: the bucket to search * @name: The lock name @@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, gh->gh_flags = flags; gh->gh_error = 0; gh->gh_iflags = 0; - init_completion(&gh->gh_wait); - - if (gh->gh_state == LM_ST_EXCLUSIVE) - gh->gh_flags |= GL_LOCAL_EXCL; - gfs2_glock_hold(gl); } @@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * { gh->gh_state = state; gh->gh_flags = flags; - if (gh->gh_state == LM_ST_EXCLUSIVE) - gh->gh_flags |= GL_LOCAL_EXCL; - gh->gh_iflags &= 1 << HIF_ALLOCED; gh->gh_ip = (unsigned long)__builtin_return_address(0); } @@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh) kfree(gh); } +static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh) +{ + if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) { + gfs2_holder_put(gh); + return; + } + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); +} + +static int holder_wait(void *word) +{ + schedule(); + return 0; +} + +static void wait_on_holder(struct gfs2_holder *gh) +{ + might_sleep(); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); +} + /** * rq_mutex - process a mutex request in the queue * @gh: the glock holder @@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh) list_del_init(&gh->gh_list); /* gh->gh_error never examined. */ set_bit(GLF_LOCK, &gl->gl_flags); - complete(&gh->gh_wait); + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); return 1; } @@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { if (list_empty(&gl->gl_holders)) { @@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh) gfs2_reclaim_glock(sdp); } - glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); + gfs2_glock_xmote_th(gh); spin_lock(&gl->gl_spin); } return 1; @@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh) set_bit(GLF_LOCK, &gl->gl_flags); } else { struct gfs2_holder *next_gh; - if (gh->gh_flags & GL_LOCAL_EXCL) + if (gh->gh_state == LM_ST_EXCLUSIVE) return 1; next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); - if (next_gh->gh_flags & GL_LOCAL_EXCL) + if (next_gh->gh_state == LM_ST_EXCLUSIVE) return 1; } @@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh) gh->gh_error = 0; set_bit(HIF_HOLDER, &gh->gh_iflags); - complete(&gh->gh_wait); + gfs2_holder_dispose_or_wake(gh); return 0; } @@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh) static int rq_demote(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; if (!list_empty(&gl->gl_holders)) return 1; @@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh) list_del_init(&gh->gh_list); gh->gh_error = 0; spin_unlock(&gl->gl_spin); - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); + gfs2_holder_dispose_or_wake(gh); spin_lock(&gl->gl_spin); } else { gl->gl_req_gh = gh; @@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh) if (gh->gh_state == LM_ST_UNLOCKED || gl->gl_state != LM_ST_EXCLUSIVE) - glops->go_drop_th(gl); + gfs2_glock_drop_th(gl); else - glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); + gfs2_glock_xmote_th(gh); spin_lock(&gl->gl_spin); } @@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh) } /** - * rq_greedy - process a queued request to drop greedy status - * @gh: the glock holder - * - * Returns: 1 if the queue is blocked - */ - -static int rq_greedy(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - list_del_init(&gh->gh_list); - /* gh->gh_error never examined. */ - clear_bit(GLF_GREEDY, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - gfs2_holder_uninit(gh); - kfree(container_of(gh, struct greedy, gr_gh)); - - spin_lock(&gl->gl_spin); - - return 0; -} - -/** * run_queue - process holder structures on a glock * @gl: the glock * @@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl) if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) blocked = rq_demote(gh); - else if (test_bit(HIF_GREEDY, &gh->gh_iflags)) - blocked = rq_greedy(gh); else gfs2_assert_warn(gl->gl_sbd, 0); @@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) gfs2_holder_init(gl, 0, 0, &gh); set_bit(HIF_MUTEX, &gh.gh_iflags); + if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) + BUG(); spin_lock(&gl->gl_spin); if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { @@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) } else { gl->gl_owner = current; gl->gl_ip = (unsigned long)__builtin_return_address(0); - complete(&gh.gh_wait); + clear_bit(HIF_WAIT, &gh.gh_iflags); + smp_mb(); + wake_up_bit(&gh.gh_iflags, HIF_WAIT); } spin_unlock(&gl->gl_spin); - wait_for_completion(&gh.gh_wait); + wait_on_holder(&gh); gfs2_holder_uninit(&gh); } @@ -774,6 +740,7 @@ restart: return; set_bit(HIF_DEMOTE, &new_gh->gh_iflags); set_bit(HIF_DEALLOC, &new_gh->gh_iflags); + set_bit(HIF_WAIT, &new_gh->gh_iflags); goto restart; } @@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) int op_done = 1; gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); state_change(gl, ret & LM_OUT_ST_MASK); @@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); - if (gh) { - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); - } + if (gh) + gfs2_holder_dispose_or_wake(gh); } /** @@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) +void gfs2_glock_xmote_th(struct gfs2_holder *gh) { + struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_sbd; + int flags = gh->gh_flags; + unsigned state = gh->gh_state; const struct gfs2_glock_operations *glops = gl->gl_ops; int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_ANY | LM_FLAG_PRIORITY); unsigned int lck_ret; + if (glops->go_xmote_th) + glops->go_xmote_th(gl); + gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); gfs2_assert_warn(sdp, state != gl->gl_state); - if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl); - gfs2_glock_hold(gl); gl->gl_req_bh = xmote_bh; @@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh = gl->gl_req_gh; - clear_bit(GLF_PREFETCH, &gl->gl_flags); - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); @@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); - if (gh) { - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); - } + if (gh) + gfs2_holder_dispose_or_wake(gh); } /** @@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_drop_th(struct gfs2_glock *gl) +static void gfs2_glock_drop_th(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned int ret; + if (glops->go_drop_th) + glops->go_drop_th(gl); + gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); - if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl); - gfs2_glock_hold(gl); gl->gl_req_bh = drop_bh; @@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh) if (gh->gh_flags & LM_FLAG_PRIORITY) do_cancels(gh); - wait_for_completion(&gh->gh_wait); - + wait_on_holder(gh); if (gh->gh_error) return gh->gh_error; @@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh) struct gfs2_holder *existing; BUG_ON(!gh->gh_owner); + if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) + BUG(); existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); if (existing) { @@ -1227,8 +1188,6 @@ restart: } } - clear_bit(GLF_PREFETCH, &gl->gl_flags); - return error; } @@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh) } /** - * gfs2_glock_prefetch - Try to prefetch a glock - * @gl: the glock - * @state: the state to prefetch in - * @flags: flags passed to go_xmote_th() - * - */ - -static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, - int flags) -{ - const struct gfs2_glock_operations *glops = gl->gl_ops; - - spin_lock(&gl->gl_spin); - - if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) || - !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) || - !list_empty(&gl->gl_waiters3) || - relaxed_state_ok(gl->gl_state, state, flags)) { - spin_unlock(&gl->gl_spin); - return; - } - - set_bit(GLF_PREFETCH, &gl->gl_flags); - set_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - glops->go_xmote_th(gl, state, flags); -} - -static void greedy_work(struct work_struct *work) -{ - struct greedy *gr = container_of(work, struct greedy, gr_work.work); - struct gfs2_holder *gh = &gr->gr_gh; - struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; - - clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); - - if (glops->go_greedy) - glops->go_greedy(gl); - - spin_lock(&gl->gl_spin); - - if (list_empty(&gl->gl_waiters2)) { - clear_bit(GLF_GREEDY, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_holder_uninit(gh); - kfree(gr); - } else { - gfs2_glock_hold(gl); - list_add_tail(&gh->gh_list, &gl->gl_waiters2); - run_queue(gl); - spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); - } -} - -/** - * gfs2_glock_be_greedy - - * @gl: - * @time: - * - * Returns: 0 if go_greedy will be called, 1 otherwise - */ - -int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time) -{ - struct greedy *gr; - struct gfs2_holder *gh; - - if (!time || gl->gl_sbd->sd_args.ar_localcaching || - test_and_set_bit(GLF_GREEDY, &gl->gl_flags)) - return 1; - - gr = kmalloc(sizeof(struct greedy), GFP_KERNEL); - if (!gr) { - clear_bit(GLF_GREEDY, &gl->gl_flags); - return 1; - } - gh = &gr->gr_gh; - - gfs2_holder_init(gl, 0, 0, gh); - set_bit(HIF_GREEDY, &gh->gh_iflags); - INIT_DELAYED_WORK(&gr->gr_work, greedy_work); - - set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); - schedule_delayed_work(&gr->gr_work, time); - - return 0; -} - -/** * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it * @gh: the holder structure * @@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b) return 1; if (a->ln_number < b->ln_number) return -1; - if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) - return 1; - if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL)) - return 1; + BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); return 0; } @@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) } /** - * gfs2_glock_prefetch_num - prefetch a glock based on lock number - * @sdp: the filesystem - * @number: the lock number - * @glops: the glock operations for the type of glock - * @state: the state to acquire the glock in - * @flags: modifier flags for the aquisition - * - * Returns: errno - */ - -void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - unsigned int state, int flags) -{ - struct gfs2_glock *gl; - int error; - - if (atomic_read(&sdp->sd_reclaim_count) < - gfs2_tune_get(sdp, gt_reclaim_limit)) { - error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); - if (!error) { - gfs2_glock_prefetch(gl, state, flags); - gfs2_glock_put(gl); - } - } -} - -/** * gfs2_lvb_hold - attach a LVB from a glock * @gl: The glock in question * @@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, if (!gl) return; - if (gl->gl_ops->go_callback) - gl->gl_ops->go_callback(gl, state); handle_callback(gl, state); spin_lock(&gl->gl_spin); @@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) struct lm_async_cb *async = data; struct gfs2_glock *gl; + down_read(&gfs2_umount_flush_sem); gl = gfs2_glock_find(sdp, &async->lc_name); if (gfs2_assert_warn(sdp, gl)) return; if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) gl->gl_req_bh(gl, async->lc_ret); gfs2_glock_put(gl); + up_read(&gfs2_umount_flush_sem); return; } @@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) static int demote_ok(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; const struct gfs2_glock_operations *glops = gl->gl_ops; int demote = 1; if (test_bit(GLF_STICKY, &gl->gl_flags)) demote = 0; - else if (test_bit(GLF_PREFETCH, &gl->gl_flags)) - demote = time_after_eq(jiffies, gl->gl_stamp + - gfs2_tune_get(sdp, gt_prefetch_secs) * HZ); else if (glops->go_demote_ok) demote = glops->go_demote_ok(gl); @@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) atomic_inc(&sdp->sd_reclaimed); if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED); gfs2_glmutex_unlock(gl); @@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl) return; if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) goto out_schedule; gfs2_glmutex_unlock(gl); @@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl) } if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED); gfs2_glmutex_unlock(gl); @@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) t = jiffies; } + down_write(&gfs2_umount_flush_sem); invalidate_inodes(sdp->sd_vfs); + up_write(&gfs2_umount_flush_sem); msleep(10); } } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index fb39108fc05..f50e40ceca4 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -20,7 +20,6 @@ #define LM_FLAG_ANY 0x00000008 #define LM_FLAG_PRIORITY 0x00000010 */ -#define GL_LOCAL_EXCL 0x00000020 #define GL_ASYNC 0x00000040 #define GL_EXACT 0x00000080 #define GL_SKIP 0x00000100 @@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh); void gfs2_holder_uninit(struct gfs2_holder *gh); - -void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags); -void gfs2_glock_drop_th(struct gfs2_glock *gl); - int gfs2_glock_nq(struct gfs2_holder *gh); int gfs2_glock_poll(struct gfs2_holder *gh); int gfs2_glock_wait(struct gfs2_holder *gh); void gfs2_glock_dq(struct gfs2_holder *gh); -int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time); - void gfs2_glock_dq_uninit(struct gfs2_holder *gh); int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, @@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); -void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - unsigned int state, int flags); - /** * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock * @gl: the glock diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index b068d10bcb6..c4b0391b7aa 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) static void meta_go_sync(struct gfs2_glock *gl) { + if (gl->gl_state != LM_ST_EXCLUSIVE) + return; + if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { gfs2_log_flush(gl->gl_sbd, gl); gfs2_meta_sync(gl); gfs2_ail_empty_gl(gl); } - } /** @@ -142,6 +144,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) } /** + * inode_go_sync - Sync the dirty data and/or metadata for an inode glock + * @gl: the glock protecting the inode + * + */ + +static void inode_go_sync(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip = gl->gl_object; + + if (ip && !S_ISREG(ip->i_inode.i_mode)) + ip = NULL; + + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { + gfs2_log_flush(gl->gl_sbd, gl); + if (ip) + filemap_fdatawrite(ip->i_inode.i_mapping); + gfs2_meta_sync(gl); + if (ip) { + struct address_space *mapping = ip->i_inode.i_mapping; + int error = filemap_fdatawait(mapping); + if (error == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else if (error) + set_bit(AS_EIO, &mapping->flags); + } + clear_bit(GLF_DIRTY, &gl->gl_flags); + gfs2_ail_empty_gl(gl); + } +} + +/** * inode_go_xmote_th - promote/demote a glock * @gl: the glock * @state: the requested state @@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) * */ -static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, - int flags) +static void inode_go_xmote_th(struct gfs2_glock *gl) { if (gl->gl_state != LM_ST_UNLOCKED) gfs2_pte_inval(gl); - gfs2_glock_xmote_th(gl, state, flags); + if (gl->gl_state == LM_ST_EXCLUSIVE) + inode_go_sync(gl); } /** @@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl) static void inode_go_drop_th(struct gfs2_glock *gl) { gfs2_pte_inval(gl); - gfs2_glock_drop_th(gl); -} - -/** - * inode_go_sync - Sync the dirty data and/or metadata for an inode glock - * @gl: the glock protecting the inode - * - */ - -static void inode_go_sync(struct gfs2_glock *gl) -{ - struct gfs2_inode *ip = gl->gl_object; - - if (ip && !S_ISREG(ip->i_inode.i_mode)) - ip = NULL; - - if (test_bit(GLF_DIRTY, &gl->gl_flags)) { - gfs2_log_flush(gl->gl_sbd, gl); - if (ip) - filemap_fdatawrite(ip->i_inode.i_mapping); - gfs2_meta_sync(gl); - if (ip) { - struct address_space *mapping = ip->i_inode.i_mapping; - int error = filemap_fdatawait(mapping); - if (error == -ENOSPC) - set_bit(AS_ENOSPC, &mapping->flags); - else if (error) - set_bit(AS_EIO, &mapping->flags); - } - clear_bit(GLF_DIRTY, &gl->gl_flags); - gfs2_ail_empty_gl(gl); - } + if (gl->gl_state == LM_ST_EXCLUSIVE) + inode_go_sync(gl); } /** @@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh) if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && (gl->gl_state == LM_ST_EXCLUSIVE) && - (gh->gh_flags & GL_LOCAL_EXCL)) + (gh->gh_state == LM_ST_EXCLUSIVE)) error = gfs2_truncatei_resume(ip); return error; @@ -319,39 +322,6 @@ static void inode_go_unlock(struct gfs2_holder *gh) } /** - * inode_greedy - - * @gl: the glock - * - */ - -static void inode_greedy(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - struct gfs2_inode *ip = gl->gl_object; - unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum); - unsigned int max = gfs2_tune_get(sdp, gt_greedy_max); - unsigned int new_time; - - spin_lock(&ip->i_spin); - - if (time_after(ip->i_last_pfault + quantum, jiffies)) { - new_time = ip->i_greedy + quantum; - if (new_time > max) - new_time = max; - } else { - new_time = ip->i_greedy - quantum; - if (!new_time || new_time > max) - new_time = 1; - } - - ip->i_greedy = new_time; - - spin_unlock(&ip->i_spin); - - iput(&ip->i_inode); -} - -/** * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock * @gl: the glock * @@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) * */ -static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, - int flags) +static void trans_go_xmote_th(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; @@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, gfs2_meta_syncfs(sdp); gfs2_log_shutdown(sdp); } - - gfs2_glock_xmote_th(gl, state, flags); } /** @@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl) gfs2_meta_syncfs(sdp); gfs2_log_shutdown(sdp); } - - gfs2_glock_drop_th(gl); } /** @@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl) } const struct gfs2_glock_operations gfs2_meta_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, + .go_xmote_th = meta_go_sync, + .go_drop_th = meta_go_sync, .go_type = LM_TYPE_META, }; @@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_xmote_th = inode_go_xmote_th, .go_xmote_bh = inode_go_xmote_bh, .go_drop_th = inode_go_drop_th, - .go_sync = inode_go_sync, .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, .go_lock = inode_go_lock, .go_unlock = inode_go_unlock, - .go_greedy = inode_greedy, .go_type = LM_TYPE_INODE, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, - .go_sync = meta_go_sync, .go_inval = meta_go_inval, .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, @@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = { }; const struct gfs2_glock_operations gfs2_iopen_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_IOPEN, }; const struct gfs2_glock_operations gfs2_flock_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_FLOCK, }; const struct gfs2_glock_operations gfs2_nondisk_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_NONDISK, }; const struct gfs2_glock_operations gfs2_quota_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_demote_ok = quota_go_demote_ok, .go_type = LM_TYPE_QUOTA, }; const struct gfs2_glock_operations gfs2_journal_glops = { - .go_xmote_th = gfs2_glock_xmote_th, - .go_drop_th = gfs2_glock_drop_th, .go_type = LM_TYPE_JOURNAL, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 734421edae8..12c80fd28db 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -101,17 +101,14 @@ struct gfs2_bufdata { }; struct gfs2_glock_operations { - void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags); + void (*go_xmote_th) (struct gfs2_glock *gl); void (*go_xmote_bh) (struct gfs2_glock *gl); void (*go_drop_th) (struct gfs2_glock *gl); void (*go_drop_bh) (struct gfs2_glock *gl); - void (*go_sync) (struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); - void (*go_callback) (struct gfs2_glock *gl, unsigned int state); - void (*go_greedy) (struct gfs2_glock *gl); const int go_type; }; @@ -120,7 +117,6 @@ enum { HIF_MUTEX = 0, HIF_PROMOTE = 1, HIF_DEMOTE = 2, - HIF_GREEDY = 3, /* States */ HIF_ALLOCED = 4, @@ -128,6 +124,7 @@ enum { HIF_HOLDER = 6, HIF_FIRST = 7, HIF_ABORTED = 9, + HIF_WAIT = 10, }; struct gfs2_holder { @@ -140,17 +137,14 @@ struct gfs2_holder { int gh_error; unsigned long gh_iflags; - struct completion gh_wait; unsigned long gh_ip; }; enum { GLF_LOCK = 1, GLF_STICKY = 2, - GLF_PREFETCH = 3, GLF_DIRTY = 5, GLF_SKIP_WAITERS2 = 6, - GLF_GREEDY = 7, }; struct gfs2_glock { @@ -167,7 +161,7 @@ struct gfs2_glock { unsigned long gl_ip; struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ - struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ + struct list_head gl_waiters2; /* HIF_DEMOTE */ struct list_head gl_waiters3; /* HIF_PROMOTE */ const struct gfs2_glock_operations *gl_ops; @@ -236,7 +230,6 @@ struct gfs2_inode { spinlock_t i_spin; struct rw_semaphore i_rw_mutex; - unsigned int i_greedy; unsigned long i_last_pfault; struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; @@ -418,17 +411,12 @@ struct gfs2_tune { unsigned int gt_atime_quantum; /* Min secs between atime updates */ unsigned int gt_new_files_jdata; unsigned int gt_new_files_directio; - unsigned int gt_max_atomic_write; /* Split big writes into this size */ unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ unsigned int gt_lockdump_size; unsigned int gt_stall_secs; /* Detects trouble! */ unsigned int gt_complain_secs; unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ unsigned int gt_entries_per_readdir; - unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */ - unsigned int gt_greedy_default; - unsigned int gt_greedy_quantum; - unsigned int gt_greedy_max; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d122074c45e..0d6831a4056 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -287,10 +287,8 @@ out: * * Returns: errno */ - int gfs2_change_nlink(struct gfs2_inode *ip, int diff) { - struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info; struct buffer_head *dibh; u32 nlink; int error; @@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) else drop_nlink(&ip->i_inode); - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); mark_inode_dirty(&ip->i_inode); - if (ip->i_inode.i_nlink == 0) { - struct gfs2_rgrpd *rgd; - struct gfs2_holder ri_gh, rg_gh; - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto out; - error = -EIO; - rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); - if (!rgd) - goto out_norgrp; - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); - if (error) - goto out_norgrp; - + if (ip->i_inode.i_nlink == 0) gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ - gfs2_glock_dq_uninit(&rg_gh); -out_norgrp: - gfs2_glock_dq_uninit(&ri_gh); - } -out: + return error; } struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) { struct qstr qstr; + struct inode *inode; gfs2_str2qstr(&qstr, name); - return gfs2_lookupi(dip, &qstr, 1, NULL); + inode = gfs2_lookupi(dip, &qstr, 1, NULL); + /* gfs2_lookupi has inconsistent callers: vfs + * related routines expect NULL for no entry found, + * gfs2_lookup_simple callers expect ENOENT + * and do not check for NULL. + */ + if (inode == NULL) + return ERR_PTR(-ENOENT); + else + return inode; } @@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) * @is_root: If 1, ignore the caller's permissions * @i_gh: An uninitialized holder for the new inode glock * - * There will always be a vnode (Linux VFS inode) for the d_gh inode unless - * @is_root is true. + * This can be called via the VFS filldir function when NFS is doing + * a readdirplus and the inode which its intending to stat isn't + * already in cache. In this case we must not take the directory glock + * again, since the readdir call will have already taken that lock. * * Returns: errno */ @@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, struct gfs2_holder d_gh; struct gfs2_inum_host inum; unsigned int type; - int error = 0; + int error; struct inode *inode = NULL; + int unlock = 0; if (!name->len || name->len > GFS2_FNAMESIZE) return ERR_PTR(-ENAMETOOLONG); @@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, return dir; } - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - if (error) - return ERR_PTR(error); + if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + return ERR_PTR(error); + unlock = 1; + } if (!is_root) { error = permission(dir, MAY_EXEC, NULL); @@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, inode = gfs2_inode_lookup(sb, &inum, type); out: - gfs2_glock_dq_uninit(&d_gh); + if (unlock) + gfs2_glock_dq_uninit(&d_gh); if (error == -ENOENT) return NULL; - return inode; + return inode ? inode : ERR_PTR(error); } static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c index effe4a337c1..e30673dd37e 100644 --- a/fs/gfs2/lm.c +++ b/fs/gfs2/lm.c @@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) vprintk(fmt, args); va_end(args); - fs_err(sdp, "about to withdraw from the cluster\n"); + fs_err(sdp, "about to withdraw this file system\n"); BUG_ON(sdp->sd_args.ar_debug); - - fs_err(sdp, "waiting for outstanding I/O\n"); - - /* FIXME: suspend dm device so oustanding bio's complete - and all further io requests fail */ - fs_err(sdp, "telling LM to withdraw\n"); gfs2_withdraw_lockproto(&sdp->sd_lockstruct); fs_err(sdp, "withdrawn\n"); diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 33af707a4d3..a87c7bf3c56 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -36,7 +36,7 @@ #define GDLM_STRNAME_BYTES 24 #define GDLM_LVB_SIZE 32 -#define GDLM_DROP_COUNT 50000 +#define GDLM_DROP_COUNT 200000 #define GDLM_DROP_PERIOD 60 #define GDLM_NAME_LEN 128 diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c index 2194b1d5b5e..a0e7eda643e 100644 --- a/fs/gfs2/locking/dlm/main.c +++ b/fs/gfs2/locking/dlm/main.c @@ -11,9 +11,6 @@ #include "lock_dlm.h" -extern int gdlm_drop_count; -extern int gdlm_drop_period; - extern struct lm_lockops gdlm_ops; static int __init init_lock_dlm(void) @@ -40,9 +37,6 @@ static int __init init_lock_dlm(void) return error; } - gdlm_drop_count = GDLM_DROP_COUNT; - gdlm_drop_period = GDLM_DROP_PERIOD; - printk(KERN_INFO "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index cdd1694e889..1d8faa3da8a 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -9,8 +9,6 @@ #include "lock_dlm.h" -int gdlm_drop_count; -int gdlm_drop_period; const struct lm_lockops gdlm_ops; @@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, if (!ls) return NULL; - ls->drop_locks_count = gdlm_drop_count; - ls->drop_locks_period = gdlm_drop_period; + ls->drop_locks_count = GDLM_DROP_COUNT; + ls->drop_locks_period = GDLM_DROP_PERIOD; ls->fscb = cb; ls->sdp = sdp; ls->fsflags = flags; diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index 29ae06f9494..4746b884662 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c @@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) return sprintf(buf, "%d\n", ls->recover_jid_status); } +static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf) +{ + return sprintf(buf, "%d\n", ls->drop_locks_count); +} + +static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len) +{ + ls->drop_locks_count = simple_strtol(buf, NULL, 0); + return len; +} + struct gdlm_attr { struct attribute attr; ssize_t (*show)(struct gdlm_ls *, char *); @@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL); GDLM_ATTR(recover, 0644, recover_show, recover_store); GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); +GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store); static struct attribute *gdlm_attrs[] = { &gdlm_attr_proto_name.attr, @@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = { &gdlm_attr_recover.attr, &gdlm_attr_recover_done.attr, &gdlm_attr_recover_status.attr, + &gdlm_attr_drop_count.attr, NULL, }; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 4d7f94d8c7b..16bb4b4561a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); struct gfs2_trans *tr; - if (!list_empty(&bd->bd_list_tr)) + gfs2_log_lock(sdp); + if (!list_empty(&bd->bd_list_tr)) { + gfs2_log_unlock(sdp); return; - + } tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); + gfs2_log_unlock(sdp); if (!list_empty(&le->le_list)) return; @@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); - gfs2_log_lock(sdp); sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); @@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) struct list_head *head = &tr->tr_list_buf; struct gfs2_bufdata *bd; + gfs2_log_lock(sdp); while (!list_empty(head)) { bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); list_del_init(&bd->bd_list_tr); tr->tr_num_buf--; } + gfs2_log_unlock(sdp); gfs2_assert_warn(sdp, !tr->tr_num_buf); } @@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct address_space *mapping = bd->bd_bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); + gfs2_log_lock(sdp); tr->tr_touched = 1; if (list_empty(&bd->bd_list_tr) && (ip->i_di.di_flags & GFS2_DIF_JDATA)) { tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); + gfs2_log_unlock(sdp); gfs2_pin(sdp, bd->bd_bh); tr->tr_num_buf_new++; + } else { + gfs2_log_unlock(sdp); } gfs2_trans_add_gl(bd->bd_gl); gfs2_log_lock(sdp); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d8d69a72a10..56e33590b65 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -16,6 +16,7 @@ #include <linux/pagevec.h> #include <linux/mpage.h> #include <linux/fs.h> +#include <linux/writeback.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> @@ -157,6 +158,32 @@ out_ignore: } /** + * gfs2_writepages - Write a bunch of dirty pages back to disk + * @mapping: The mapping to write + * @wbc: Write-back control + * + * For journaled files and/or ordered writes this just falls back to the + * kernel's default writepages path for now. We will probably want to change + * that eventually (i.e. when we look at allocate on flush). + * + * For the data=writeback case though we can already ignore buffer heads + * and write whole extents at once. This is a big reduction in the + * number of I/O requests we send and the bmap calls we make in this case. + */ +static int gfs2_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) + return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); + + return generic_writepages(mapping, wbc); +} + +/** * stuffed_readpage - Fill in a Linux page with stuffed file data * @ip: the inode * @page: the page @@ -256,7 +283,7 @@ out_unlock: * the page lock and the glock) and return having done no I/O. Its * obviously not something we'd want to do on too regular a basis. * Any I/O we ignore at this time will be done via readpage later. - * 2. We have to handle stuffed files here too. + * 2. We don't handle stuffed files here we let readpage do the honours. * 3. mpage_readpages() does most of the heavy lifting in the common case. * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as @@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder gh; - unsigned page_idx; - int ret; + int ret = 0; int do_unlock = 0; if (likely(file != &gfs2_internal_file_sentinel)) { @@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, goto out_unlock; } skip_lock: - if (gfs2_is_stuffed(ip)) { - struct pagevec lru_pvec; - pagevec_init(&lru_pvec, 0); - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - prefetchw(&page->flags); - list_del(&page->lru); - if (!add_to_page_cache(page, mapping, - page->index, GFP_KERNEL)) { - ret = stuffed_readpage(ip, page); - unlock_page(page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else { - page_cache_release(page); - } - } - pagevec_lru_add(&lru_pvec); - ret = 0; - } else { - /* What we really want to do .... */ + if (!gfs2_is_stuffed(ip)) ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); - } if (do_unlock) { gfs2_glock_dq_m(1, &gh); @@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); error = gfs2_glock_nq_atime(&ip->i_gh); if (unlikely(error)) { - if (error == GLR_TRYFAILED) + if (error == GLR_TRYFAILED) { + unlock_page(page); error = AOP_TRUNCATED_PAGE; + } goto out_uninit; } @@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset) return; } +/** + * gfs2_ok_for_dio - check that dio is valid on this file + * @ip: The inode + * @rw: READ or WRITE + * @offset: The offset at which we are reading or writing + * + * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) + * 1 (to accept the i/o request) + */ +static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) +{ + /* + * Should we return an error here? I can't see that O_DIRECT for + * a journaled file makes any sense. For now we'll silently fall + * back to buffered I/O, likewise we do the same for stuffed + * files since they are (a) small and (b) unaligned. + */ + if (gfs2_is_jdata(ip)) + return 0; + + if (gfs2_is_stuffed(ip)) + return 0; + + if (offset > i_size_read(&ip->i_inode)) + return 0; + return 1; +} + + + static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, struct gfs2_holder gh; int rv; - if (rw == READ) - mutex_lock(&inode->i_mutex); /* - * Shared lock, even if its a write, since we do no allocation - * on this path. All we need change is atime. + * Deferred lock, even if its a write, since we do no allocation + * on this path. All we need change is atime, and this lock mode + * ensures that other nodes have flushed their buffered read caches + * (i.e. their page cache entries for this inode). We do not, + * unfortunately have the option of only flushing a range like + * the VFS does. */ - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); rv = gfs2_glock_nq_atime(&gh); if (rv) - goto out; - - if (offset > i_size_read(inode)) - goto out; - - /* - * Should we return an error here? I can't see that O_DIRECT for - * a journaled file makes any sense. For now we'll silently fall - * back to buffered I/O, likewise we do the same for stuffed - * files since they are (a) small and (b) unaligned. - */ - if (gfs2_is_jdata(ip)) - goto out; - - if (gfs2_is_stuffed(ip)) - goto out; - - rv = blockdev_direct_IO_own_locking(rw, iocb, inode, - inode->i_sb->s_bdev, - iov, offset, nr_segs, - gfs2_get_block_direct, NULL); + return rv; + rv = gfs2_ok_for_dio(ip, rw, offset); + if (rv != 1) + goto out; /* dio not valid, fall back to buffered i/o */ + + rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, + gfs2_get_block_direct, NULL); out: gfs2_glock_dq_m(1, &gh); gfs2_holder_uninit(&gh); - if (rw == READ) - mutex_unlock(&inode->i_mutex); - return rv; } @@ -763,6 +786,7 @@ out: const struct address_space_operations gfs2_file_aops = { .writepage = gfs2_writepage, + .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index d355899585d..9187eb174b4 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) struct gfs2_inum_host inum; unsigned int type; int error; + int had_lock=0; if (inode && is_bad_inode(inode)) goto invalid; @@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) if (sdp->sd_args.ar_localcaching) goto valid; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - if (error) - goto fail; + had_lock = gfs2_glock_is_locked_by_me(dip->i_gl); + if (!had_lock) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + goto fail; + } error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); switch (error) { @@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) } valid_gunlock: - gfs2_glock_dq_uninit(&d_gh); + if (!had_lock) + gfs2_glock_dq_uninit(&d_gh); valid: dput(parent); return 1; invalid_gunlock: - gfs2_glock_dq_uninit(&d_gh); + if (!had_lock) + gfs2_glock_dq_uninit(&d_gh); invalid: if (inode && S_ISDIR(inode->i_mode)) { if (have_submounts(dentry)) diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b4e7b877531..4855e8cca62 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -22,6 +22,7 @@ #include "glock.h" #include "glops.h" #include "inode.h" +#include "ops_dentry.h" #include "ops_export.h" #include "rgrp.h" #include "util.h" @@ -112,13 +113,12 @@ struct get_name_filldir { char *name; }; -static int get_name_filldir(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum_host *inum, - unsigned int type) +static int get_name_filldir(void *opaque, const char *name, int length, + loff_t offset, u64 inum, unsigned int type) { - struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; + struct get_name_filldir *gnfd = opaque; - if (!gfs2_inum_equal(inum, &gnfd->inum)) + if (inum != gnfd->inum.no_addr) return 0; memcpy(gnfd->name, name, length); @@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) return ERR_PTR(-ENOMEM); } + dentry->d_op = &gfs2_dops; return dentry; } @@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) } error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, - LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, - &i_gh); + LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return ERR_PTR(error); @@ -269,6 +269,7 @@ out_inode: return ERR_PTR(-ENOMEM); } + dentry->d_op = &gfs2_dops; return dentry; fail_rgd: diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index faa07e4b97d..c996aa739a0 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -43,15 +43,6 @@ #include "util.h" #include "eaops.h" -/* For regular, non-NFS */ -struct filldir_reg { - struct gfs2_sbd *fdr_sbd; - int fdr_prefetch; - - filldir_t fdr_filldir; - void *fdr_opaque; -}; - /* * Most fields left uninitialised to catch anybody who tries to * use them. f_flags set to prevent file_accessed() from touching @@ -128,41 +119,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) } /** - * filldir_func - Report a directory entry to the caller of gfs2_dir_read() - * @opaque: opaque data used by the function - * @name: the name of the directory entry - * @length: the length of the name - * @offset: the entry's offset in the directory - * @inum: the inode number the entry points to - * @type: the type of inode the entry points to - * - * Returns: 0 on success, 1 if buffer full - */ - -static int filldir_func(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum_host *inum, - unsigned int type) -{ - struct filldir_reg *fdr = (struct filldir_reg *)opaque; - struct gfs2_sbd *sdp = fdr->fdr_sbd; - int error; - - error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, - inum->no_addr, type); - if (error) - return 1; - - if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { - gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops, - LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); - gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops, - LM_ST_SHARED, LM_FLAG_TRY); - } - - return 0; -} - -/** * gfs2_readdir - Read directory entries from a directory * @file: The directory to read from * @dirent: Buffer for dirents @@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) { struct inode *dir = file->f_mapping->host; struct gfs2_inode *dip = GFS2_I(dir); - struct filldir_reg fdr; struct gfs2_holder d_gh; u64 offset = file->f_pos; int error; - fdr.fdr_sbd = GFS2_SB(dir); - fdr.fdr_prefetch = 1; - fdr.fdr_filldir = filldir; - fdr.fdr_opaque = dirent; - gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); error = gfs2_glock_nq_atime(&d_gh); if (error) { @@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) return error; } - error = gfs2_dir_read(dir, &offset, &fdr, filldir_func); + error = gfs2_dir_read(dir, &offset, dirent, filldir); gfs2_glock_dq_uninit(&d_gh); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index d14e139d267..ee80b8a5e7b 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -867,9 +867,9 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, error = -EBUSY; goto error; } - mutex_lock(&sb->s_bdev->bd_mount_mutex); + down(&sb->s_bdev->bd_mount_sem); new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); - mutex_unlock(&sb->s_bdev->bd_mount_mutex); + up(&sb->s_bdev->bd_mount_sem); if (IS_ERR(new)) { error = PTR_ERR(new); goto error; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 636dda4c7d3..f40a84807d7 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder ghs[2]; + struct gfs2_holder ghs[3]; + struct gfs2_rgrpd *rgd; + struct gfs2_holder ri_gh; int error; + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - error = gfs2_glock_nq_m(2, ghs); + rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + + + error = gfs2_glock_nq_m(3, ghs); if (error) goto out; @@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) out_end_trans: gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq_m(2, ghs); + gfs2_glock_dq_m(3, ghs); out: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); + gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder ghs[2]; + struct gfs2_holder ghs[3]; + struct gfs2_rgrpd *rgd; + struct gfs2_holder ri_gh; int error; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - error = gfs2_glock_nq_m(2, ghs); + rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + + error = gfs2_glock_nq_m(3, ghs); if (error) goto out; @@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq_m(2, ghs); + gfs2_glock_dq_m(3, ghs); out: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); + gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_inode *ip = GFS2_I(odentry->d_inode); struct gfs2_inode *nip = NULL; struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[4], r_gh; + struct gfs2_holder ghs[5], r_gh; + struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; int alloc_required; @@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (nip) { gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); num_gh++; + /* grab the resource lock for unlink flag twiddling + * this is the case of the target file already existing + * so we unlink before doing the rename + */ + nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr); + if (nrgd) + gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); } error = gfs2_glock_nq_m(num_gh, ghs); @@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + al->al_rgd->rd_ri.ri_length + 4 * RES_DINODE + 4 * RES_LEAF + - RES_STATFS + RES_QUOTA, 0); + RES_STATFS + RES_QUOTA + 4, 0); if (error) goto out_ipreserv; } else { error = gfs2_trans_begin(sdp, 4 * RES_DINODE + - 5 * RES_LEAF, 0); + 5 * RES_LEAF + 4, 0); if (error) goto out_gunlock; } @@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; - ip->i_inode.i_ctime.tv_sec = get_seconds(); + ip->i_inode.i_ctime = CURRENT_TIME_SEC; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, } generic_fillattr(inode, stat); - if (unlock); + if (unlock) gfs2_glock_dq_uninit(&gh); return 0; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 7685b46f934..47369d01121 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; int error; + if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return; + for (;;) { error = gfs2_freeze_fs(sdp); if (!error) @@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode) } error = gfs2_dinode_dealloc(ip); + /* + * Must do this before unlock to avoid trying to write back + * potentially dirty data now that inode no longer exists + * on disk. + */ + truncate_inode_pages(&inode->i_data, 0); out_unlock: gfs2_glock_dq(&ip->i_iopen_gh); @@ -443,14 +452,12 @@ out: static struct inode *gfs2_alloc_inode(struct super_block *sb) { - struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip; ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); if (ip) { ip->i_flags = 0; ip->i_gl = NULL; - ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default); ip->i_last_pfault = jiffies; } return &ip->i_inode; diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 45a5f11fc39..14b380fb060 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -28,34 +28,13 @@ #include "trans.h" #include "util.h" -static void pfault_be_greedy(struct gfs2_inode *ip) -{ - unsigned int time; - - spin_lock(&ip->i_spin); - time = ip->i_greedy; - ip->i_last_pfault = jiffies; - spin_unlock(&ip->i_spin); - - igrab(&ip->i_inode); - if (gfs2_glock_be_greedy(ip->i_gl, time)) - iput(&ip->i_inode); -} - static struct page *gfs2_private_nopage(struct vm_area_struct *area, unsigned long address, int *type) { struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); - struct page *result; set_bit(GIF_PAGED, &ip->i_flags); - - result = filemap_nopage(area, address, type); - - if (result && result != NOPAGE_OOM) - pfault_be_greedy(ip); - - return result; + return filemap_nopage(area, address, type); } static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) @@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, set_page_dirty(result); } - pfault_be_greedy(ip); out: gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 43a24f2e590..70f424fcf1c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_atime_quantum = 3600; gt->gt_new_files_jdata = 0; gt->gt_new_files_directio = 0; - gt->gt_max_atomic_write = 4 << 20; gt->gt_max_readahead = 1 << 18; gt->gt_lockdump_size = 131072; gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; gt->gt_reclaim_limit = 5000; gt->gt_entries_per_readdir = 32; - gt->gt_prefetch_secs = 10; - gt->gt_greedy_default = HZ / 10; - gt->gt_greedy_quantum = HZ / 40; - gt->gt_greedy_max = HZ / 4; gt->gt_statfs_quantum = 30; gt->gt_statfs_slow = 0; } @@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) mutex_lock(&sdp->sd_jindex_mutex); for (;;) { - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, - GL_LOCAL_EXCL, ji_gh); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); if (error) break; @@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) struct gfs2_log_header_host head; int error; - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - GL_LOCAL_EXCL, &t_gh); + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); if (error) return error; @@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) gfs2_quota_sync(sdp); gfs2_statfs_sync(sdp); - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - GL_LOCAL_EXCL | GL_NOCACHE, - &t_gh); + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, + &t_gh); if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 983eaf1e06b..d01f9f0fda2 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0); TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); TUNE_ATTR(reclaim_limit, 0); -TUNE_ATTR(prefetch_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(new_files_directio, 0); TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(quota_cache_secs, 1); -TUNE_ATTR(max_atomic_write, 1); TUNE_ATTR(stall_secs, 1); -TUNE_ATTR(greedy_default, 1); -TUNE_ATTR(greedy_quantum, 1); -TUNE_ATTR(greedy_max, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_DAEMON(scand_secs, scand_process); TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); @@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = { &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, &tune_attr_reclaim_limit.attr, - &tune_attr_prefetch_secs.attr, &tune_attr_statfs_slow.attr, &tune_attr_quota_simul_sync.attr, &tune_attr_quota_cache_secs.attr, - &tune_attr_max_atomic_write.attr, &tune_attr_stall_secs.attr, - &tune_attr_greedy_default.attr, - &tune_attr_greedy_quantum.attr, - &tune_attr_greedy_max.attr, &tune_attr_statfs_quantum.attr, &tune_attr_scand_secs.attr, &tune_attr_recoverd_secs.attr, diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index cca3fb693f9..70543b17e4c 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -76,7 +76,7 @@ extern int make_symlink(const char *from, const char *to); extern int unlink_file(const char *file); extern int do_mkdir(const char *file, int mode); extern int do_rmdir(const char *file); -extern int do_mknod(const char *file, int mode, int dev); +extern int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor); extern int link_file(const char *from, const char *to); extern int do_readlink(char *file, char *buf, int size); extern int rename_file(char *from, char *to); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 1e6fc379987..69a376f35a6 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -755,7 +755,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) goto out_put; init_special_inode(inode, mode, dev); - err = do_mknod(name, mode, dev); + err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); if(err) goto out_free; diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 23b7cee7212..1ed5ea389f1 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -295,11 +295,11 @@ int do_rmdir(const char *file) return(0); } -int do_mknod(const char *file, int mode, int dev) +int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor) { int err; - err = mknod(file, mode, dev); + err = mknod(file, mode, makedev(major, minor)); if(err) return(-errno); return(0); } diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c index 077258b2103..5a95fbdd6fd 100644 --- a/fs/jffs/jffs_fm.c +++ b/fs/jffs/jffs_fm.c @@ -17,6 +17,7 @@ * */ #include <linux/slab.h> +#include <linux/err.h> #include <linux/blkdev.h> #include <linux/jffs.h> #include "jffs_fm.h" @@ -104,7 +105,7 @@ jffs_build_begin(struct jffs_control *c, int unit) mtd = get_mtd_device(NULL, unit); - if (!mtd) { + if (IS_ERR(mtd)) { kfree(fmc); DJM(no_jffs_fmcontrol--); return NULL; diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c index 72b4fc13a10..4189e4a3605 100644 --- a/fs/jffs2/debug.c +++ b/fs/jffs2/debug.c @@ -178,8 +178,8 @@ __jffs2_dbg_acct_paranoia_check_nolock(struct jffs2_sb_info *c, while (ref2) { uint32_t totlen = ref_totlen(c, jeb, ref2); - if (ref2->flash_offset < jeb->offset || - ref2->flash_offset > jeb->offset + c->sector_size) { + if (ref_offset(ref2) < jeb->offset || + ref_offset(ref2) > jeb->offset + c->sector_size) { JFFS2_ERROR("node_ref %#08x shouldn't be in block at %#08x.\n", ref_offset(ref2), jeb->offset); goto error; diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 3daf3bca037..f89c85d5a3f 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -13,6 +13,7 @@ #ifndef _JFFS2_DEBUG_H_ #define _JFFS2_DEBUG_H_ +#include <linux/sched.h> #ifndef CONFIG_JFFS2_FS_DEBUG #define CONFIG_JFFS2_FS_DEBUG 0 diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 7bc1a4201c0..abb90c0c09c 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -502,12 +502,11 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) if (ret) return ret; - c->inocache_list = kmalloc(INOCACHE_HASHSIZE * sizeof(struct jffs2_inode_cache *), GFP_KERNEL); + c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL); if (!c->inocache_list) { ret = -ENOMEM; goto out_wbuf; } - memset(c->inocache_list, 0, INOCACHE_HASHSIZE * sizeof(struct jffs2_inode_cache *)); jffs2_init_xattr_subsystem(c); diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index daff3341ff9..3a3cf225981 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -838,6 +838,8 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) { + cond_resched(); + /* We only care about obsolete ones */ if (!(ref_obsolete(raw))) continue; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index 0ddfd70307f..4178b4b5594 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -294,23 +294,21 @@ static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev) static inline struct jffs2_node_frag *frag_first(struct rb_root *root) { - struct rb_node *node = root->rb_node; + struct rb_node *node = rb_first(root); if (!node) return NULL; - while(node->rb_left) - node = node->rb_left; + return rb_entry(node, struct jffs2_node_frag, rb); } static inline struct jffs2_node_frag *frag_last(struct rb_root *root) { - struct rb_node *node = root->rb_node; + struct rb_node *node = rb_last(root); if (!node) return NULL; - while(node->rb_right) - node = node->rb_right; + return rb_entry(node, struct jffs2_node_frag, rb); } diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 266423b2709..58a0b912e9d 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -944,13 +944,12 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) { struct jffs2_raw_inode n; - struct jffs2_inode_info *f = kmalloc(sizeof(*f), GFP_KERNEL); + struct jffs2_inode_info *f = kzalloc(sizeof(*f), GFP_KERNEL); int ret; if (!f) return -ENOMEM; - memset(f, 0, sizeof(*f)); init_MUTEX_LOCKED(&f->sem); f->inocache = ic; diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index e2413466ddd..3af746eaff0 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -128,17 +128,19 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } if (jffs2_sum_active()) { - s = kmalloc(sizeof(struct jffs2_summary), GFP_KERNEL); + s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); if (!s) { + kfree(flashbuf); JFFS2_WARNING("Can't allocate memory for summary\n"); return -ENOMEM; } - memset(s, 0, sizeof(struct jffs2_summary)); } for (i=0; i<c->nr_blocks; i++) { struct jffs2_eraseblock *jeb = &c->blocks[i]; + cond_resched(); + /* reset summary info for next eraseblock scan */ jffs2_sum_reset_collected(s); diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index e52cef526d9..25265965bdc 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -26,15 +26,13 @@ int jffs2_sum_init(struct jffs2_sb_info *c) { - c->summary = kmalloc(sizeof(struct jffs2_summary), GFP_KERNEL); + c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); if (!c->summary) { JFFS2_WARNING("Can't allocate memory for summary information!\n"); return -ENOMEM; } - memset(c->summary, 0, sizeof(struct jffs2_summary)); - c->summary->sum_buf = vmalloc(c->sector_size); if (!c->summary->sum_buf) { @@ -398,6 +396,8 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras for (i=0; i<je32_to_cpu(summary->sum_num); i++) { dbg_summary("processing summary index %d\n", i); + cond_resched(); + /* Make sure there's a spare ref for dirty space */ err = jffs2_prealloc_raw_node_refs(c, jeb, 2); if (err) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 7deb7825402..08a0e6c49e6 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/fs.h> +#include <linux/err.h> #include <linux/mount.h> #include <linux/jffs2.h> #include <linux/pagemap.h> @@ -184,9 +185,9 @@ static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type, struct mtd_info *mtd; mtd = get_mtd_device(NULL, mtdnr); - if (!mtd) { + if (IS_ERR(mtd)) { D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr)); - return -EINVAL; + return PTR_ERR(mtd); } return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); @@ -221,7 +222,7 @@ static int jffs2_get_sb(struct file_system_type *fs_type, D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd:%%s, name \"%s\"\n", dev_name+4)); for (mtdnr = 0; mtdnr < MAX_MTD_DEVICES; mtdnr++) { mtd = get_mtd_device(NULL, mtdnr); - if (mtd) { + if (!IS_ERR(mtd)) { if (!strcmp(mtd->name, dev_name+4)) return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); put_mtd_device(mtd); diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index fc211b6e9b0..b90d5aa3d96 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c @@ -51,7 +51,7 @@ static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) */ if (!p) { - printk(KERN_ERR "jffs2_follow_link(): can't find symlink taerget\n"); + printk(KERN_ERR "jffs2_follow_link(): can't find symlink target\n"); p = ERR_PTR(-EIO); } D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->target)); diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 70707309dfa..9c99859f5ed 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -969,8 +969,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, int oobsize = c->mtd->oobsize; struct mtd_oob_ops ops; - ops.len = NR_OOB_SCAN_PAGES * oobsize; - ops.ooblen = oobsize; + ops.ooblen = NR_OOB_SCAN_PAGES * oobsize; ops.oobbuf = c->oobbuf; ops.ooboffs = 0; ops.datbuf = NULL; @@ -983,10 +982,10 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, return ret; } - if (ops.retlen < ops.len) { + if (ops.oobretlen < ops.ooblen) { D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB " "returned short read (%zd bytes not %d) for block " - "at %08x\n", ops.retlen, ops.len, jeb->offset)); + "at %08x\n", ops.oobretlen, ops.ooblen, jeb->offset)); return -EIO; } @@ -1005,7 +1004,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, } /* we know, we are aligned :) */ - for (page = oobsize; page < ops.len; page += sizeof(long)) { + for (page = oobsize; page < ops.ooblen; page += sizeof(long)) { long dat = *(long *)(&ops.oobbuf[page]); if(dat != -1) return 1; @@ -1033,7 +1032,6 @@ int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c, return 2; } - ops.len = oobsize; ops.ooblen = oobsize; ops.oobbuf = c->oobbuf; ops.ooboffs = 0; @@ -1048,10 +1046,10 @@ int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c, return ret; } - if (ops.retlen < ops.len) { + if (ops.oobretlen < ops.ooblen) { D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): " "Read OOB return short read (%zd bytes not %d) " - "for block at %08x\n", ops.retlen, ops.len, + "for block at %08x\n", ops.oobretlen, ops.ooblen, jeb->offset)); return -EIO; } @@ -1090,8 +1088,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER); n.totlen = cpu_to_je32(8); - ops.len = c->fsdata_len; - ops.ooblen = c->fsdata_len;; + ops.ooblen = c->fsdata_len; ops.oobbuf = (uint8_t *)&n; ops.ooboffs = c->fsdata_pos; ops.datbuf = NULL; @@ -1105,10 +1102,10 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, jeb->offset, ret)); return ret; } - if (ops.retlen != ops.len) { + if (ops.oobretlen != ops.ooblen) { D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): " "Short write for block at %08x: %zd not %d\n", - jeb->offset, ops.retlen, ops.len)); + jeb->offset, ops.oobretlen, ops.ooblen)); return -EIO; } return 0; diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 4da09ce1d1f..4bb3f189733 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -399,8 +399,6 @@ static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datu { /* must be called under down_write(xattr_sem) */ if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) { - uint32_t xid = xd->xid, version = xd->version; - unload_xattr_datum(c, xd); xd->flags |= JFFS2_XFLAGS_DEAD; if (xd->node == (void *)xd) { @@ -411,7 +409,8 @@ static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datu } spin_unlock(&c->erase_completion_lock); - dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version); + dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", + xd->xid, xd->version); } } diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 06270774516..f4d45d4d835 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -176,7 +176,7 @@ reclaimer(void *ptr) lock_kernel(); lockd_up(0); /* note: this cannot fail as lockd is already running */ - dprintk("lockd: reclaiming locks for host %s", host->h_name); + dprintk("lockd: reclaiming locks for host %s\n", host->h_name); restart: nsmstate = host->h_nsmstate; @@ -206,7 +206,7 @@ restart: host->h_reclaiming = 0; up_write(&host->h_rwsem); - dprintk("NLM: done reclaiming locks for host %s", host->h_name); + dprintk("NLM: done reclaiming locks for host %s\n", host->h_name); /* Now, wake up all processes that sleep on a blocked lock */ list_for_each_entry(block, &nlm_blocked, b_list) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dee3d6c0f19..d9ba8cb0ee7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -532,7 +532,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) lock_kernel(); - res = nfs_revalidate_mapping(inode, filp->f_mapping); + res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); if (res < 0) { unlock_kernel(); return res; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0dd6be346aa..9e4a2b70995 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -315,14 +315,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) static int nfs_release_page(struct page *page, gfp_t gfp) { - /* - * Avoid deadlock on nfs_wait_on_request(). - */ - if (!(gfp & __GFP_FS)) - return 0; - /* Hack... Force nfs_wb_page() to write out the page */ - SetPageDirty(page); - return !nfs_wb_page(page->mapping->host, page); + /* If PagePrivate() is set, then the page is not freeable */ + return 0; +} + +static int nfs_launder_page(struct page *page) +{ + return nfs_wb_page(page->mapping->host, page); } const struct address_space_operations nfs_file_aops = { @@ -338,6 +337,7 @@ const struct address_space_operations nfs_file_aops = { #ifdef CONFIG_NFS_DIRECTIO .direct_IO = nfs_direct_IO, #endif + .launder_page = nfs_launder_page, }; static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, @@ -434,8 +434,9 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) BUG(); } if (res < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", - __FUNCTION__); + dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" + " - error %d!\n", + __FUNCTION__, res); return res; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 63e47027930..d8349828283 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -665,49 +665,86 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return __nfs_revalidate_inode(server, inode); } +static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (mapping->nrpages != 0) { + int ret = invalidate_inode_pages2(mapping); + if (ret < 0) + return ret; + } + spin_lock(&inode->i_lock); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) { + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute = jiffies; + } + spin_unlock(&inode->i_lock); + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + return 0; +} + +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + int ret = 0; + + mutex_lock(&inode->i_mutex); + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) { + ret = nfs_sync_mapping(mapping); + if (ret == 0) + ret = nfs_invalidate_mapping_nolock(inode, mapping); + } + mutex_unlock(&inode->i_mutex); + return ret; +} + /** - * nfs_revalidate_mapping - Revalidate the pagecache + * nfs_revalidate_mapping_nolock - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping */ -int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; - if (NFS_STALE(inode)) - ret = -ESTALE; if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode)) + || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (ret < 0) - goto out; + if (ret < 0) + goto out; + } + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + ret = nfs_invalidate_mapping_nolock(inode, mapping); +out: + return ret; +} - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { - if (mapping->nrpages != 0) { - if (S_ISREG(inode->i_mode)) { - ret = nfs_sync_mapping(mapping); - if (ret < 0) - goto out; - } - ret = invalidate_inode_pages2(mapping); - if (ret < 0) - goto out; - } - spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) { - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - /* This ensures we revalidate child dentries */ - nfsi->cache_change_attribute = jiffies; - } - spin_unlock(&inode->i_lock); +/** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + * + * This version of the function will take the inode->i_mutex and attempt to + * flush out all dirty data if it needs to invalidate the page cache. + */ +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret = 0; - nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { + ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret < 0) + goto out; } + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + ret = nfs_invalidate_mapping(inode, mapping); out: return ret; } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 6c686112cc0..525c136c7d8 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -50,7 +50,9 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct page *page; - void *err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); + void *err; + + err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping)); if (err) goto read_failed; page = read_cache_page(&inode->i_data, 0, diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 248dd92e6a5..49c310b8492 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -35,7 +35,6 @@ #include <linux/lockd/bind.h> #define NFSDDBG_FACILITY NFSDDBG_EXPORT -#define NFSD_PARANOIA 1 typedef struct auth_domain svc_client; typedef struct svc_export svc_export; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 277df40f098..e695660921e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -990,15 +990,16 @@ encode_entry(struct readdir_cd *ccd, const char *name, } int -nfs3svc_encode_entry(struct readdir_cd *cd, const char *name, - int namlen, loff_t offset, ino_t ino, unsigned int d_type) +nfs3svc_encode_entry(void *cd, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int d_type) { return encode_entry(cd, name, namlen, offset, ino, d_type, 0); } int -nfs3svc_encode_entry_plus(struct readdir_cd *cd, const char *name, - int namlen, loff_t offset, ino_t ino, unsigned int d_type) +nfs3svc_encode_entry_plus(void *cd, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) { return encode_entry(cd, name, namlen, offset, ino, d_type, 1); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fea46368afb..18aa9440df1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1880,9 +1880,10 @@ nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) } static int -nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen, - loff_t offset, ino_t ino, unsigned int d_type) +nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { + struct readdir_cd *ccd = ccdv; struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); int buflen; __be32 *p = cd->buffer; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index b06bf9f70ef..c59d6fbb7a6 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -24,8 +24,6 @@ #include <linux/nfsd/nfsd.h> #define NFSDDBG_FACILITY NFSDDBG_FH -#define NFSD_PARANOIA 1 -/* #define NFSD_DEBUG_VERBOSE 1 */ static int nfsd_nr_verified; @@ -230,13 +228,12 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) error = nfserrno(PTR_ERR(dentry)); goto out; } -#ifdef NFSD_PARANOIA + if (S_ISDIR(dentry->d_inode->i_mode) && (dentry->d_flags & DCACHE_DISCONNECTED)) { printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); } -#endif fhp->fh_dentry = dentry; fhp->fh_export = exp; @@ -267,12 +264,13 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) /* Finally, check access permissions. */ error = nfsd_permission(exp, dentry, access); -#ifdef NFSD_PARANOIA_EXTREME if (error) { - printk("fh_verify: %s/%s permission failure, acc=%x, error=%d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, access, (error >> 24)); + dprintk("fh_verify: %s/%s permission failure, " + "acc=%x, error=%d\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, + access, ntohl(error)); } -#endif out: if (exp && !IS_ERR(exp)) exp_put(exp); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0aaccb03bf7..fbf5d51947e 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -72,7 +72,7 @@ static struct svc_program nfsd_acl_program = { .pg_prog = NFS_ACL_PROGRAM, .pg_nvers = NFSD_ACL_NRVERS, .pg_vers = nfsd_acl_versions, - .pg_name = "nfsd", + .pg_name = "nfsacl", .pg_class = "nfsd", .pg_stats = &nfsd_acl_svcstats, .pg_authenticate = &svc_set_client, @@ -118,16 +118,16 @@ int nfsd_vers(int vers, enum vers_op change) switch(change) { case NFSD_SET: nfsd_versions[vers] = nfsd_version[vers]; - break; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) if (vers < NFSD_ACL_NRVERS) - nfsd_acl_version[vers] = nfsd_acl_version[vers]; + nfsd_acl_versions[vers] = nfsd_acl_version[vers]; #endif + break; case NFSD_CLEAR: nfsd_versions[vers] = NULL; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) if (vers < NFSD_ACL_NRVERS) - nfsd_acl_version[vers] = NULL; + nfsd_acl_versions[vers] = NULL; #endif break; case NFSD_TEST: diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index f5243f94399..6555c50d900 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -462,9 +462,10 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p, } int -nfssvc_encode_entry(struct readdir_cd *ccd, const char *name, - int namlen, loff_t offset, ino_t ino, unsigned int d_type) +nfssvc_encode_entry(void *ccdv, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int d_type) { + struct readdir_cd *ccd = ccdv; struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common); __be32 *p = cd->buffer; int buflen, slen; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7a79c23aa6d..8283236c6a0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -59,7 +59,6 @@ #include <asm/uaccess.h> #define NFSDDBG_FACILITY NFSDDBG_FILEOP -#define NFSD_PARANOIA /* We must ignore files (but only files) which might have mandatory @@ -822,7 +821,8 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); - put_page(*pp); + if (*pp) + put_page(*pp); *pp = page; rqstp->rq_resused++; rqstp->rq_res.page_len += size; @@ -1244,7 +1244,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; int host_err; __u32 v_mtime=0, v_atime=0; - int v_mode=0; err = nfserr_perm; if (!flen) @@ -1281,16 +1280,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; if (createmode == NFS3_CREATE_EXCLUSIVE) { - /* while the verifier would fit in mtime+atime, - * solaris7 gets confused (bugid 4218508) if these have - * the high bit set, so we use the mode as well + /* solaris7 gets confused (bugid 4218508) if these have + * the high bit set, so just clear the high bits. */ v_mtime = verifier[0]&0x7fffffff; v_atime = verifier[1]&0x7fffffff; - v_mode = S_IFREG - | ((verifier[0]&0x80000000) >> (32-7)) /* u+x */ - | ((verifier[1]&0x80000000) >> (32-9)) /* u+r */ - ; } if (dchild->d_inode) { @@ -1318,7 +1312,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_EXCLUSIVE: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_mode == v_mode && dchild->d_inode->i_size == 0 ) break; /* fallthru */ @@ -1340,26 +1333,22 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, } if (createmode == NFS3_CREATE_EXCLUSIVE) { - /* Cram the verifier into atime/mtime/mode */ + /* Cram the verifier into atime/mtime */ iap->ia_valid = ATTR_MTIME|ATTR_ATIME - | ATTR_MTIME_SET|ATTR_ATIME_SET - | ATTR_MODE; + | ATTR_MTIME_SET|ATTR_ATIME_SET; /* XXX someone who knows this better please fix it for nsec */ iap->ia_mtime.tv_sec = v_mtime; iap->ia_atime.tv_sec = v_atime; iap->ia_mtime.tv_nsec = 0; iap->ia_atime.tv_nsec = 0; - iap->ia_mode = v_mode; } /* Set file attributes. - * Mode has already been set but we might need to reset it - * for CREATE_EXCLUSIVE * Irix appears to send along the gid when it tries to * implement setgid directories via NFS. Clear out all that cruft. */ set_attr: - if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) { + if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); if (err2) err = err2; @@ -1726,7 +1715,7 @@ out: */ __be32 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, - struct readdir_cd *cdp, encode_dent_fn func) + struct readdir_cd *cdp, filldir_t func) { __be32 err; int host_err; @@ -1751,7 +1740,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, do { cdp->err = nfserr_eof; /* will be cleared on successful read */ - host_err = vfs_readdir(file, (filldir_t) func, cdp); + host_err = vfs_readdir(file, func, cdp); } while (host_err >=0 && cdp->err == nfs_ok); if (host_err) err = nfserrno(host_err); diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 35cc4b1d60f..af4ef808fa9 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -17,6 +17,13 @@ ToDo/Notes: happen is unclear however so it is worth waiting until someone hits the problem. +2.1.28 - Fix a deadlock. + + - Fix deadlock in fs/ntfs/inode.c::ntfs_put_inode(). Thanks to Sergey + Vlasov for the report and detailed analysis of the deadlock. The fix + involved getting rid of ntfs_put_inode() altogether and hence NTFS no + longer has a ->put_inode super operation. + 2.1.27 - Various bug fixes and cleanups. - Fix two compiler warnings on Alpha. Thanks to Andrew Morton for diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index e27b4eacffb..82550838556 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.27\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.28\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 7b2c8f4f6a6..629e7abdd84 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -92,10 +92,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) ofs = 0; if (file_ofs < init_size) ofs = init_size - file_ofs; + local_irq_save(flags); kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); memset(kaddr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); + local_irq_restore(flags); flush_dcache_page(page); } } else { @@ -143,11 +145,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_CACHE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); + local_irq_save(flags); kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); for (i = 0; i < recs; i++) post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); + local_irq_restore(flags); flush_dcache_page(page); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 8296c29ae3b..74f99a6a369 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1,7 +1,7 @@ /** * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -1249,16 +1249,12 @@ skip_index_root: /* Get the offset into the index allocation attribute. */ ia_pos = (s64)fpos - vol->mft_record_size; ia_mapping = vdir->i_mapping; - bmp_vi = ndir->itype.index.bmp_ino; - if (unlikely(!bmp_vi)) { - ntfs_debug("Inode 0x%lx, regetting index bitmap.", vdir->i_ino); - bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); - if (IS_ERR(bmp_vi)) { - ntfs_error(sb, "Failed to get bitmap attribute."); - err = PTR_ERR(bmp_vi); - goto err_out; - } - ndir->itype.index.bmp_ino = bmp_vi; + ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino); + bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); + if (IS_ERR(bmp_vi)) { + ntfs_error(sb, "Failed to get bitmap attribute."); + err = PTR_ERR(bmp_vi); + goto err_out; } bmp_mapping = bmp_vi->i_mapping; /* Get the starting bitmap bit position and sanity check it. */ @@ -1266,7 +1262,7 @@ skip_index_root: if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) { ntfs_error(sb, "Current index allocation position exceeds " "index bitmap size."); - goto err_out; + goto iput_err_out; } /* Get the starting bit position in the current bitmap page. */ cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1); @@ -1282,7 +1278,7 @@ get_next_bmp_page: ntfs_error(sb, "Reading index bitmap failed."); err = PTR_ERR(bmp_page); bmp_page = NULL; - goto err_out; + goto iput_err_out; } bmp = (u8*)page_address(bmp_page); /* Find next index block in use. */ @@ -1429,6 +1425,7 @@ find_next_index_buffer: /* @ia_page is already unlocked in this case. */ ntfs_unmap_page(ia_page); ntfs_unmap_page(bmp_page); + iput(bmp_vi); goto abort; } } @@ -1439,6 +1436,7 @@ unm_EOD: ntfs_unmap_page(ia_page); } ntfs_unmap_page(bmp_page); + iput(bmp_vi); EOD: /* We are finished, set fpos to EOD. */ fpos = i_size + vol->mft_record_size; @@ -1455,8 +1453,11 @@ done: filp->f_pos = fpos; return 0; err_out: - if (bmp_page) + if (bmp_page) { ntfs_unmap_page(bmp_page); +iput_err_out: + iput(bmp_vi); + } if (ia_page) { unlock_page(ia_page); ntfs_unmap_page(ia_page); @@ -1529,14 +1530,22 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) { - struct inode *vi = dentry->d_inode; - ntfs_inode *ni = NTFS_I(vi); + struct inode *bmp_vi, *vi = dentry->d_inode; int err, ret; + ntfs_attr na; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(!S_ISDIR(vi->i_mode)); - if (NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) - write_inode_now(ni->itype.index.bmp_ino, !datasync); + /* If the bitmap attribute inode is in memory sync it, too. */ + na.mft_no = vi->i_ino; + na.type = AT_BITMAP; + na.name = I30; + na.name_len = 4; + bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na); + if (bmp_vi) { + write_inode_now(bmp_vi, !datasync); + iput(bmp_vi); + } ret = ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); err = sync_blockdev(vi->i_sb->s_bdev); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 247989891b4..f8bf8da67ee 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1,7 +1,7 @@ /** * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2006 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -95,7 +95,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) * If initializing the normal file/directory inode, set @na->type to AT_UNUSED. * In that case, @na->name and @na->name_len should be set to NULL and 0, * respectively. Although that is not strictly necessary as - * ntfs_read_inode_locked() will fill them in later. + * ntfs_read_locked_inode() will fill them in later. * * Return 0 on success and -errno on error. * @@ -171,8 +171,8 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no) { struct inode *vi; - ntfs_attr na; int err; + ntfs_attr na; na.mft_no = mft_no; na.type = AT_UNUSED; @@ -229,8 +229,8 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type, ntfschar *name, u32 name_len) { struct inode *vi; - ntfs_attr na; int err; + ntfs_attr na; /* Make sure no one calls ntfs_attr_iget() for indices. */ BUG_ON(type == AT_INDEX_ALLOCATION); @@ -287,8 +287,8 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, u32 name_len) { struct inode *vi; - ntfs_attr na; int err; + ntfs_attr na; na.mft_no = base_vi->i_ino; na.type = AT_INDEX_ALLOCATION; @@ -402,7 +402,6 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) ntfs_init_runlist(&ni->attr_list_rl); lockdep_set_class(&ni->attr_list_rl.lock, &attr_list_rl_lock_class); - ni->itype.index.bmp_ino = NULL; ni->itype.index.block_size = 0; ni->itype.index.vcn_size = 0; ni->itype.index.collation_rule = 0; @@ -546,6 +545,7 @@ static int ntfs_read_locked_inode(struct inode *vi) { ntfs_volume *vol = NTFS_SB(vi->i_sb); ntfs_inode *ni; + struct inode *bvi; MFT_RECORD *m; ATTR_RECORD *a; STANDARD_INFORMATION *si; @@ -780,7 +780,6 @@ skip_attr_list_load: */ if (S_ISDIR(vi->i_mode)) { loff_t bvi_size; - struct inode *bvi; ntfs_inode *bni; INDEX_ROOT *ir; u8 *ir_end, *index_end; @@ -985,13 +984,12 @@ skip_attr_list_load: err = PTR_ERR(bvi); goto unm_err_out; } - ni->itype.index.bmp_ino = bvi; bni = NTFS_I(bvi); if (NInoCompressed(bni) || NInoEncrypted(bni) || NInoSparse(bni)) { ntfs_error(vi->i_sb, "$BITMAP attribute is compressed " "and/or encrypted and/or sparse."); - goto unm_err_out; + goto iput_unm_err_out; } /* Consistency check bitmap size vs. index allocation size. */ bvi_size = i_size_read(bvi); @@ -1000,8 +998,10 @@ skip_attr_list_load: ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) " "for index allocation (0x%llx).", bvi_size << 3, vi->i_size); - goto unm_err_out; + goto iput_unm_err_out; } + /* No longer need the bitmap attribute inode. */ + iput(bvi); skip_large_dir_stuff: /* Setup the operations for this inode. */ vi->i_op = &ntfs_dir_inode_ops; @@ -1176,7 +1176,8 @@ no_data_attr_special_case: vi->i_blocks = ni->allocated_size >> 9; ntfs_debug("Done."); return 0; - +iput_unm_err_out: + iput(bvi); unm_err_out: if (!err) err = -EIO; @@ -1697,7 +1698,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) vi->i_size); goto iput_unm_err_out; } - ni->itype.index.bmp_ino = bvi; + iput(bvi); skip_large_index_stuff: /* Setup the operations for this index inode. */ vi->i_op = NULL; @@ -1714,7 +1715,6 @@ skip_large_index_stuff: ntfs_debug("Done."); return 0; - iput_unm_err_out: iput(bvi); unm_err_out: @@ -2191,37 +2191,6 @@ err_out: return -1; } -/** - * ntfs_put_inode - handler for when the inode reference count is decremented - * @vi: vfs inode - * - * The VFS calls ntfs_put_inode() every time the inode reference count (i_count) - * is about to be decremented (but before the decrement itself. - * - * If the inode @vi is a directory with two references, one of which is being - * dropped, we need to put the attribute inode for the directory index bitmap, - * if it is present, otherwise the directory inode would remain pinned for - * ever. - */ -void ntfs_put_inode(struct inode *vi) -{ - if (S_ISDIR(vi->i_mode) && atomic_read(&vi->i_count) == 2) { - ntfs_inode *ni = NTFS_I(vi); - if (NInoIndexAllocPresent(ni)) { - struct inode *bvi = NULL; - mutex_lock(&vi->i_mutex); - if (atomic_read(&vi->i_count) == 2) { - bvi = ni->itype.index.bmp_ino; - if (bvi) - ni->itype.index.bmp_ino = NULL; - } - mutex_unlock(&vi->i_mutex); - if (bvi) - iput(bvi); - } - } -} - static void __ntfs_clear_inode(ntfs_inode *ni) { /* Free all alocated memory. */ @@ -2287,18 +2256,6 @@ void ntfs_clear_big_inode(struct inode *vi) { ntfs_inode *ni = NTFS_I(vi); - /* - * If the inode @vi is an index inode we need to put the attribute - * inode for the index bitmap, if it is present, otherwise the index - * inode would disappear and the attribute inode for the index bitmap - * would no longer be referenced from anywhere and thus it would remain - * pinned for ever. - */ - if (NInoAttr(ni) && (ni->type == AT_INDEX_ALLOCATION) && - NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) { - iput(ni->itype.index.bmp_ino); - ni->itype.index.bmp_ino = NULL; - } #ifdef NTFS_RW if (NInoDirty(ni)) { bool was_bad = (is_bad_inode(vi)); diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index f088291e017..117eaf8032a 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -2,7 +2,7 @@ * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of * the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -101,8 +101,6 @@ struct _ntfs_inode { runlist attr_list_rl; /* Run list for the attribute list value. */ union { struct { /* It is a directory, $MFT, or an index inode. */ - struct inode *bmp_ino; /* Attribute inode for the - index $BITMAP. */ u32 block_size; /* Size of an index block. */ u32 vcn_size; /* Size of a vcn in this index. */ @@ -300,8 +298,6 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni); extern int ntfs_read_inode_mount(struct inode *vi); -extern void ntfs_put_inode(struct inode *vi); - extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt); #ifdef NTFS_RW diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 03a391ac714..babf94d90de 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1,7 +1,7 @@ /* * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2006 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2001,2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -2702,9 +2702,6 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs) static struct super_operations ntfs_sops = { .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ - .put_inode = ntfs_put_inode, /* VFS: Called just before - the inode reference count - is decreased. */ #ifdef NTFS_RW //.dirty_inode = NULL, /* VFS: Called from // __mark_inode_dirty(). */ @@ -3261,7 +3258,7 @@ static void __exit exit_ntfs_fs(void) } MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>"); -MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2006 Anton Altaparmakov"); +MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2007 Anton Altaparmakov"); MODULE_VERSION(NTFS_VERSION); MODULE_LICENSE("GPL"); #ifdef DEBUG diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 06be6e774cf..56e1fefc120 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -60,14 +60,11 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0); - if (IS_ERR(inode)) { - mlog_errno(PTR_ERR(inode)); + if (IS_ERR(inode)) return (void *)inode; - } if (handle->ih_generation != inode->i_generation) { iput(inode); - mlog_errno(-ESTALE); return ERR_PTR(-ESTALE); } diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index e4d91493d7d..28ab56f2b98 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -146,7 +146,6 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) if (is_bad_inode(inode)) { iput(inode); inode = ERR_PTR(-ESTALE); - mlog_errno(PTR_ERR(inode)); goto bail; } @@ -155,8 +154,7 @@ bail: mlog(0, "returning inode with number %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); mlog_exit_ptr(inode); - } else - mlog_errno(PTR_ERR(inode)); + } return inode; } @@ -247,7 +245,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, * today. change if needed. */ if (!OCFS2_IS_VALID_DINODE(fe) || !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { - mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " + mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " "signature = %.*s, flags = 0x%x\n", inode->i_ino, (unsigned long long)le64_to_cpu(fe->i_blkno), 7, @@ -478,11 +476,8 @@ static int ocfs2_read_locked_inode(struct inode *inode, S_ISBLK(le16_to_cpu(fe->i_mode))) inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); - if (ocfs2_populate_inode(inode, fe, 0) < 0) { - mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", - (unsigned long long)fe->i_blkno, inode->i_ino); + if (ocfs2_populate_inode(inode, fe, 0) < 0) goto bail; - } BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e1216364d19..d026b4f2775 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -306,8 +306,8 @@ int ocfs2_journal_dirty_data(handle_t *handle, * for the dinode, one for the new block. */ #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) -/* file update (nlink, etc) + dir entry block */ -#define OCFS2_LINK_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) +/* file update (nlink, etc) + directory mtime/ctime + dir entry block */ +#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1) /* inode + dir inode (if we unlink a dir), + dir entry block + orphan * dir inode link */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 9637039c263..f3d7803b4b4 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -932,14 +932,15 @@ static int ocfs2_unlink(struct inode *dir, goto leave; } - if (S_ISDIR(inode->i_mode)) { + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + if (S_ISDIR(inode->i_mode)) drop_nlink(dir); - status = ocfs2_mark_inode_dirty(handle, dir, - parent_node_bh); - if (status < 0) { - mlog_errno(status); + + status = ocfs2_mark_inode_dirty(handle, dir, parent_node_bh); + if (status < 0) { + mlog_errno(status); + if (S_ISDIR(inode->i_mode)) inc_nlink(dir); - } } leave: @@ -1068,6 +1069,7 @@ static int ocfs2_rename(struct inode *old_dir, char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; struct buffer_head *orphan_entry_bh = NULL; struct buffer_head *newfe_bh = NULL; + struct buffer_head *old_inode_bh = NULL; struct buffer_head *insert_entry_bh = NULL; struct ocfs2_super *osb = NULL; u64 newfe_blkno; @@ -1079,7 +1081,7 @@ static int ocfs2_rename(struct inode *old_dir, struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, // this is the 1st dirent bh - nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; + nlink_t old_dir_nlink = old_dir->i_nlink; /* At some point it might be nice to break this function up a * bit. */ @@ -1139,12 +1141,11 @@ static int ocfs2_rename(struct inode *old_dir, } /* - * Though we don't require an inode meta data update if - * old_inode is not a directory, we lock anyway here to ensure - * the vote thread on other nodes won't have to concurrently - * downconvert the inode and the dentry locks. + * Aside from allowing a meta data update, the locking here + * also ensures that the vote thread on other nodes won't have + * to concurrently downconvert the inode and the dentry locks. */ - status = ocfs2_meta_lock(old_inode, NULL, 1); + status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -1355,6 +1356,7 @@ static int ocfs2_rename(struct inode *old_dir, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); + ocfs2_mark_inode_dirty(handle, old_inode, old_inode_bh); /* now that the name has been added to new_dir, remove the old name */ status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); @@ -1384,27 +1386,22 @@ static int ocfs2_rename(struct inode *old_dir, } } mark_inode_dirty(old_dir); - if (new_inode) + ocfs2_mark_inode_dirty(handle, old_dir, old_dir_bh); + if (new_inode) { mark_inode_dirty(new_inode); + ocfs2_mark_inode_dirty(handle, new_inode, newfe_bh); + } - if (old_dir != new_dir) - if (new_dir_nlink != new_dir->i_nlink) { - if (!new_dir_bh) { - mlog(ML_ERROR, "need to change nlink for new " - "dir %llu from %d to %d but bh is NULL\n", - (unsigned long long)OCFS2_I(new_dir)->ip_blkno, - (int)new_dir_nlink, new_dir->i_nlink); - } else { - struct ocfs2_dinode *fe; - status = ocfs2_journal_access(handle, - new_dir, - new_dir_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - fe = (struct ocfs2_dinode *) new_dir_bh->b_data; - fe->i_links_count = cpu_to_le16(new_dir->i_nlink); - status = ocfs2_journal_dirty(handle, new_dir_bh); - } - } + if (old_dir != new_dir) { + /* Keep the same times on both directories.*/ + new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime; + + /* + * This will also pick up the i_nlink change from the + * block above. + */ + ocfs2_mark_inode_dirty(handle, new_dir, new_dir_bh); + } if (old_dir_nlink != old_dir->i_nlink) { if (!old_dir_bh) { @@ -1455,6 +1452,8 @@ bail: iput(new_inode); if (newfe_bh) brelse(newfe_bh); + if (old_inode_bh) + brelse(old_inode_bh); if (old_dir_bh) brelse(old_dir_bh); if (new_dir_bh) @@ -1826,6 +1825,13 @@ static int __ocfs2_add_entry(handle_t *handle, (le16_to_cpu(de->rec_len) >= rec_len)) || (le16_to_cpu(de->rec_len) >= (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) { + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); + if (retval < 0) { + mlog_errno(retval); + goto bail; + } + status = ocfs2_journal_access(handle, dir, insert_bh, OCFS2_JOURNAL_ACCESS_WRITE); /* By now the buffer is marked for journaling */ @@ -1848,7 +1854,6 @@ static int __ocfs2_add_entry(handle_t *handle, de->name_len = namelen; memcpy(de->name, name, namelen); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; dir->i_version++; status = ocfs2_journal_dirty(handle, insert_bh); retval = 0; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index b5c68567077..e61e218f5e0 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -85,7 +85,7 @@ #define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \ OCFS2_SB(sb)->s_feature_incompat &= ~(mask) -#define OCFS2_FEATURE_COMPAT_SUPP 0 +#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB #define OCFS2_FEATURE_INCOMPAT_SUPP OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT #define OCFS2_FEATURE_RO_COMPAT_SUPP 0 @@ -110,6 +110,20 @@ #define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010 /* + * backup superblock flag is used to indicate that this volume + * has backup superblocks. + */ +#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 + +/* The byte offset of the first backup block will be 1G. + * The following will be 4G, 16G, 64G, 256G and 1T. + */ +#define OCFS2_BACKUP_SB_START 1 << 30 + +/* the max backup superblock nums */ +#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6 + +/* * Flags on ocfs2_dinode.i_flags */ #define OCFS2_VALID_FL (0x00000001) /* Inode is valid */ @@ -566,6 +580,20 @@ static inline int ocfs2_truncate_recs_per_inode(struct super_block *sb) return size / sizeof(struct ocfs2_truncate_rec); } + +static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index) +{ + u64 offset = OCFS2_BACKUP_SB_START; + + if (index >= 0 && index < OCFS2_MAX_BACKUP_SUPERBLOCKS) { + offset <<= (2 * index); + offset >>= sb->s_blocksize_bits; + return offset; + } + + return 0; + +} #else static inline int ocfs2_fast_symlink_chars(int blocksize) { @@ -631,6 +659,19 @@ static inline int ocfs2_truncate_recs_per_inode(int blocksize) return size / sizeof(struct ocfs2_truncate_rec); } + +static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index) +{ + uint64_t offset = OCFS2_BACKUP_SB_START; + + if (index >= 0 && index < OCFS2_MAX_BACKUP_SUPERBLOCKS) { + offset <<= (2 * index); + offset /= blocksize; + return offset; + } + + return 0; +} #endif /* __KERNEL__ */ diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 957d6878b03..03b0191534d 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -158,8 +158,7 @@ static void *ocfs2_follow_link(struct dentry *dentry, } status = vfs_follow_link(nd, link); - if (status && status != -ENOENT) - mlog_errno(status); + bail: if (page) { kunmap(page); diff --git a/fs/proc/base.c b/fs/proc/base.c index 77a57b5799c..1a979ea3b37 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -371,9 +371,11 @@ static int mounts_open(struct inode *inode, struct file *file) if (task) { task_lock(task); - ns = task->nsproxy->mnt_ns; - if (ns) - get_mnt_ns(ns); + if (task->nsproxy) { + ns = task->nsproxy->mnt_ns; + if (ns) + get_mnt_ns(ns); + } task_unlock(task); put_task_struct(task); } @@ -2326,13 +2328,23 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; - struct task_struct *leader = get_proc_task(inode); + struct task_struct *leader = NULL; struct task_struct *task; int retval = -ENOENT; ino_t ino; int tid; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ + task = get_proc_task(inode); + if (!task) + goto out_no_task; + rcu_read_lock(); + if (pid_alive(task)) { + leader = task->group_leader; + get_task_struct(leader); + } + rcu_read_unlock(); + put_task_struct(task); if (!leader) goto out_no_task; retval = 0; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 92ea7743fe8..b37ce33f67e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -47,7 +47,6 @@ #include <linux/vmalloc.h> #include <linux/crash_dump.h> #include <linux/pid_namespace.h> -#include <linux/compile.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -254,12 +253,7 @@ static int version_read_proc(char *page, char **start, off_t off, { int len; - /* FIXED STRING! Don't touch! */ - len = snprintf(page, PAGE_SIZE, - "%s version %s" - " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")" - " (" LINUX_COMPILER ")" - " %s\n", + len = snprintf(page, PAGE_SIZE, linux_proc_banner, utsname()->sysname, utsname()->release, utsname()->version); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 99b6f329ba2..5109f1d5e7f 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -48,6 +48,11 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) } mutex_lock(&inode->i_mutex); + + mutex_lock(&(REISERFS_I(inode)->i_mmap)); + if (REISERFS_I(inode)->i_flags & i_ever_mapped) + REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; + reiserfs_write_lock(inode->i_sb); /* freeing preallocation only involves relogging blocks that * are already in the current transaction. preallocation gets @@ -100,11 +105,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) err = reiserfs_truncate_file(inode, 0); } out: + mutex_unlock(&(REISERFS_I(inode)->i_mmap)); mutex_unlock(&inode->i_mutex); reiserfs_write_unlock(inode->i_sb); return err; } +static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct inode *inode; + + inode = file->f_path.dentry->d_inode; + mutex_lock(&(REISERFS_I(inode)->i_mmap)); + REISERFS_I(inode)->i_flags |= i_ever_mapped; + mutex_unlock(&(REISERFS_I(inode)->i_mmap)); + + return generic_file_mmap(file, vma); +} + static void reiserfs_vfs_truncate_file(struct inode *inode) { reiserfs_truncate_file(inode, 1); @@ -1527,7 +1545,7 @@ const struct file_operations reiserfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = reiserfs_compat_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = reiserfs_file_mmap, .open = generic_file_open, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index f3d1c4a7797..9fcbfe31697 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1125,6 +1125,7 @@ static void init_inode(struct inode *inode, struct treepath *path) REISERFS_I(inode)->i_prealloc_count = 0; REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_jl = NULL; + mutex_init(&(REISERFS_I(inode)->i_mmap)); reiserfs_init_acl_access(inode); reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); @@ -1832,6 +1833,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, REISERFS_I(inode)->i_attrs = REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); + mutex_init(&(REISERFS_I(inode)->i_mmap)); reiserfs_init_acl_access(inode); reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); diff --git a/fs/super.c b/fs/super.c index f961e030799..3e7458c2bb7 100644 --- a/fs/super.c +++ b/fs/super.c @@ -753,9 +753,9 @@ int get_sb_bdev(struct file_system_type *fs_type, * will protect the lockfs code from trying to start a snapshot * while we are mounting */ - mutex_lock(&bdev->bd_mount_mutex); + down(&bdev->bd_mount_sem); s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - mutex_unlock(&bdev->bd_mount_mutex); + up(&bdev->bd_mount_sem); if (IS_ERR(s)) goto error_s; diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 2e0021e8f36..638f4c585e8 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -227,24 +227,27 @@ failed: * We can come here from ufs_writepage or ufs_prepare_write, * locked_page is argument of these functions, so we already lock it. */ -static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk, +static void ufs_change_blocknr(struct inode *inode, unsigned int beg, unsigned int count, unsigned int oldb, unsigned int newb, struct page *locked_page) { - unsigned int blk_per_page = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits); - struct address_space *mapping = inode->i_mapping; - pgoff_t index, cur_index = locked_page->index; - unsigned int i, j; + const unsigned mask = (1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1; + struct address_space * const mapping = inode->i_mapping; + pgoff_t index, cur_index; + unsigned end, pos, j; struct page *page; struct buffer_head *head, *bh; UFSD("ENTER, ino %lu, count %u, oldb %u, newb %u\n", inode->i_ino, count, oldb, newb); + BUG_ON(!locked_page); BUG_ON(!PageLocked(locked_page)); - for (i = 0; i < count; i += blk_per_page) { - index = (baseblk+i) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + cur_index = locked_page->index; + + for (end = count + beg; beg < end; beg = (beg | mask) + 1) { + index = beg >> (PAGE_CACHE_SHIFT - inode->i_blkbits); if (likely(cur_index != index)) { page = ufs_get_locked_page(mapping, index); @@ -253,21 +256,32 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk, } else page = locked_page; - j = i; head = page_buffers(page); bh = head; + pos = beg & mask; + for (j = 0; j < pos; ++j) + bh = bh->b_this_page; + j = 0; do { - if (likely(bh->b_blocknr == j + oldb && j < count)) { - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); - bh->b_blocknr = newb + j++; - mark_buffer_dirty(bh); + if (buffer_mapped(bh)) { + pos = bh->b_blocknr - oldb; + if (pos < count) { + UFSD(" change from %llu to %llu\n", + (unsigned long long)pos + oldb, + (unsigned long long)pos + newb); + bh->b_blocknr = newb + pos; + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + mark_buffer_dirty(bh); + ++j; + } } bh = bh->b_this_page; } while (bh != head); - set_page_dirty(page); + if (j) + set_page_dirty(page); if (likely(cur_index != index)) ufs_put_locked_page(page); @@ -415,14 +429,14 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, } result = ufs_alloc_fragments (inode, cgno, goal, request, err); if (result) { + ufs_clear_frags(inode, result + oldcount, newcount - oldcount, + locked_page != NULL); ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp, result, locked_page); *p = cpu_to_fs32(sb, result); *err = 0; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); - ufs_clear_frags(inode, result + oldcount, newcount - oldcount, - locked_page != NULL); unlock_super(sb); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 2fbab0aab68..4295ca91cf8 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -242,7 +242,8 @@ repeat: goal = tmp + uspi->s_fpb; tmp = ufs_new_fragments (inode, p, fragment - blockoff, goal, required + blockoff, - err, locked_page); + err, + phys != NULL ? locked_page : NULL); } /* * We will extend last allocated block @@ -250,7 +251,7 @@ repeat: else if (lastblock == block) { tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), - err, locked_page); + err, phys != NULL ? locked_page : NULL); } else /* (lastblock > block) */ { /* * We will allocate new block before last allocated block @@ -261,7 +262,8 @@ repeat: goal = tmp + uspi->s_fpb; } tmp = ufs_new_fragments(inode, p, fragment - blockoff, - goal, uspi->s_fpb, err, locked_page); + goal, uspi->s_fpb, err, + phys != NULL ? locked_page : NULL); } if (!tmp) { if ((!blockoff && *p) || @@ -438,9 +440,11 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head * it much more readable: */ #define GET_INODE_DATABLOCK(x) \ - ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page) + ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new,\ + bh_result->b_page) #define GET_INODE_PTR(x) \ - ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, NULL) + ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL,\ + bh_result->b_page) #define GET_INDIRECT_DATABLOCK(x) \ ufs_inode_getblock(inode, bh, x, fragment, \ &err, &phys, &new, bh_result->b_page) diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index ea11d04c41a..0437b0a6fe9 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -109,10 +109,10 @@ static int ufs_trunc_direct (struct inode * inode) tmp = fs32_to_cpu(sb, *p); if (!tmp ) ufs_panic (sb, "ufs_trunc_direct", "internal error"); + frag2 -= frag1; frag1 = ufs_fragnum (frag1); - frag2 = ufs_fragnum (frag2); - ufs_free_fragments (inode, tmp + frag1, frag2 - frag1); + ufs_free_fragments(inode, tmp + frag1, frag2); mark_inode_dirty(inode); frag_to_free = tmp + frag1; |