From 2ccb48ebb4de139eef4fcefd5f2bb823cb0d81b9 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sun, 30 Jul 2006 03:03:01 -0700 Subject: [PATCH] ext3: avoid triggering ext3_error on bad NFS file handle The inode number out of an NFS file handle gets passed eventually to ext3_get_inode_block() without any checking. If ext3_get_inode_block() allows it to trigger an error, then bad filehandles can have unpleasant effect - ext3_error() will usually cause a forced read-only remount, or a panic if `errors=panic' was used. So remove the call to ext3_error there and put a matching check in ext3/namei.c where inode numbers are read off storage. [akpm@osdl.org: fix off-by-one error] Signed-off-by: Neil Brown Signed-off-by: Jan Kara Cc: Marcel Holtmann Cc: Cc: "Stephen C. Tweedie" Cc: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 13 +++++++------ fs/ext3/namei.c | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f804d5e9d60..ab034d3053e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2402,14 +2402,15 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, struct buffer_head *bh; struct ext3_group_desc * gdp; - - if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO && - ino != EXT3_RESIZE_INO && ino < EXT3_FIRST_INO(sb)) || - ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) { - ext3_error(sb, "ext3_get_inode_block", - "bad inode number: %lu", ino); + if (!ext3_valid_inum(sb, ino)) { + /* + * This error is already checked for in namei.c unless we are + * looking at an NFS filehandle, in which case no error + * report is needed + */ return 0; } + block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); if (block_group >= EXT3_SB(sb)->s_groups_count) { ext3_error(sb,"ext3_get_inode_block","group >= groups count"); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index d9176dba369..2aa7101b27c 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1000,7 +1000,12 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str if (bh) { unsigned long ino = le32_to_cpu(de->inode); brelse (bh); - inode = iget(dir->i_sb, ino); + if (!ext3_valid_inum(dir->i_sb, ino)) { + ext3_error(dir->i_sb, "ext3_lookup", + "bad inode number: %lu", ino); + inode = NULL; + } else + inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); @@ -1028,7 +1033,13 @@ struct dentry *ext3_get_parent(struct dentry *child) return ERR_PTR(-ENOENT); ino = le32_to_cpu(de->inode); brelse(bh); - inode = iget(child->d_inode->i_sb, ino); + + if (!ext3_valid_inum(child->d_inode->i_sb, ino)) { + ext3_error(child->d_inode->i_sb, "ext3_get_parent", + "bad inode number: %lu", ino); + inode = NULL; + } else + inode = iget(child->d_inode->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); -- cgit v1.2.3-70-g09d2 From d1bbf14f37261c2c0dba71404602e1ddcec069d2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 30 Jul 2006 03:03:16 -0700 Subject: [PATCH] knfsd: Fix stale file handle problem with subtree_checking. A recent commit (7fc90ec93a5eb71f4b08403baf5ba7176b3ec6b1) moved the call to nfsd_setuser out of the 'find a dentry for a filehandle' branch of fh_verify so that it would always be called. This had the unfortunately side-effect of moving *after* the call to decode_fh, so the prober fsuid was not set when nfsd_acceptable was called, the 'permission' check did the wrong thing. This patch moves the nfsd_setuser call back where it was, and add as call in the other branch of the if. Cc: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfsfh.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ecc439d2565..501d8388453 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -187,6 +187,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) goto out; } + /* Set user creds for this exportpoint */ + error = nfserrno(nfsd_setuser(rqstp, exp)); + if (error) + goto out; + /* * Look up the dentry using the NFS file handle. */ @@ -241,16 +246,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) dprintk("nfsd: fh_verify - just checking\n"); dentry = fhp->fh_dentry; exp = fhp->fh_export; + /* Set user creds for this exportpoint; necessary even + * in the "just checking" case because this may be a + * filehandle that was created by fh_compose, and that + * is about to be used in another nfsv4 compound + * operation */ + error = nfserrno(nfsd_setuser(rqstp, exp)); + if (error) + goto out; } cache_get(&exp->h); - /* Set user creds for this exportpoint; necessary even in the "just - * checking" case because this may be a filehandle that was created by - * fh_compose, and that is about to be used in another nfsv4 compound - * operation */ - error = nfserrno(nfsd_setuser(rqstp, exp)); - if (error) - goto out; error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); if (error) -- cgit v1.2.3-70-g09d2 From 0e1dfc66b6ec94984a4778132147a8aa36461d58 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 30 Jul 2006 03:03:28 -0700 Subject: [PATCH] invalidate_bdev() speedup We can immediately bail from invalidate_bdev() if the blockdev has no pagecache. This solves the huge IPI storms which hald is causing on the big ia64 machines when it polls CDROM drives. Acked-by: Jes Sorensen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 3660dcb9759..71649ef9b65 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -470,13 +470,18 @@ out: pass does the actual I/O. */ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) { + struct address_space *mapping = bdev->bd_inode->i_mapping; + + if (mapping->nrpages == 0) + return; + invalidate_bh_lrus(); /* * FIXME: what about destroy_dirty_buffers? * We really want to use invalidate_inode_pages2() for * that, but not until that's cleaned up. */ - invalidate_inode_pages(bdev->bd_inode->i_mapping); + invalidate_inode_pages(mapping); } /* -- cgit v1.2.3-70-g09d2 From cfa224e928f782e1593b5222688fad84c2cad3e8 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Sun, 30 Jul 2006 03:03:51 -0700 Subject: [PATCH] enable mac partition label per default on pmac Enable mac partition table support per default also for a powermac config. Signed-off-by: Olaf Hering Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/partitions/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index c9a47809928..e478f194183 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -99,7 +99,7 @@ config IBM_PARTITION config MAC_PARTITION bool "Macintosh partition map support" if PARTITION_ADVANCED - default y if MAC + default y if (MAC || PPC_PMAC) help Say Y here if you would like to use hard disks under Linux which were partitioned on a Macintosh. -- cgit v1.2.3-70-g09d2 From 5b6509aa8c2f292caea7c0602ec361f920951508 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 30 Jul 2006 03:03:54 -0700 Subject: [PATCH] inotify: fix deadlock found by lockdep This is a real deadlock, a nice complex one: (warning: long explanation follows so that Andrew can have a complete patch description) it's an ABCDA deadlock: A iprune_mutex B inode->inotify_mutex C ih->mutex D dev->ev_mutex The AB relationship comes straight from invalidate_inodes() int invalidate_inodes(struct super_block * sb) { int busy; LIST_HEAD(throw_away); mutex_lock(&iprune_mutex); spin_lock(&inode_lock); inotify_unmount_inodes(&sb->s_inodes); where inotify_umount_inodes() takes the mutex_lock(&inode->inotify_mutex); The BC relationship comes directly from inotify_find_update_watch(): s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode, u32 mask) { ... mutex_lock(&inode->inotify_mutex); mutex_lock(&ih->mutex); The CD relationship comes from inotify_rm_wd: inotify_rm_wd does mutex_lock(&inode->inotify_mutex); mutex_lock(&ih->mutex) and then calls inotify_remove_watch_locked() which calls notify_dev_queue_event() which does mutex_lock(&dev->ev_mutex); (this strictly is a BCD relationship) The DA relationship comes from the most interesting part: [] shrink_icache_memory+0x42/0x270 [] shrink_slab+0x11d/0x1c9 [] try_to_free_pages+0x187/0x244 [] __alloc_pages+0x1cd/0x2e0 [] cache_alloc_refill+0x3f8/0x821 [] kmem_cache_alloc+0x85/0xcb [] kernel_event+0x2e/0x122 [] inotify_dev_queue_event+0xcc/0x140 inotify_dev_queue_event schedules a kernel_event which does a kmem_cache_alloc( , GFP_KERNEL) which may try to shrink slabs, including the inode cache .. which then takes iprune_mutex. And voila, there is an AB, a BC, a CD relationship (even a direct BCD), and also now a DA relationship -> a circular type AB-BA deadlock but involving 4 locks. The solution is simple: kernel_event() is NOT allowed to use GFP_KERNEL, but must use GFP_NOFS to not cause recursion into the VFS. Signed-off-by: Arjan van de Ven Acked-by: Ingo Molnar Acked-by: Robert Love Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/inotify_user.c b/fs/inotify_user.c index f2386442ade..017cb0f134d 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -187,7 +187,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, { struct inotify_kernel_event *kevent; - kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); + kevent = kmem_cache_alloc(event_cachep, GFP_NOFS); if (unlikely(!kevent)) return NULL; -- cgit v1.2.3-70-g09d2 From 6ecbc4e1a395062a8e99e4f5fe328f6ba166d9c8 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 30 Jul 2006 03:03:56 -0700 Subject: [PATCH] Remove incorrect unlock_kernel from allocation failure path in coda_open() Commit 398c53a757702e1e3a7a2c24860c7ad26acb53ed (in the historical GIT tree) moved the lock_kernel() in coda_open after the allocation of a coda_file_info struct, but left an unlock_kernel() in the allocation failure error path; remove it. Signed-off-by: Josh Triplett Acked-by: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coda/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/coda/file.c b/fs/coda/file.c index cc66c681bd1..dbfbcfa5b3c 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -136,10 +136,8 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) coda_vfs_stat.open++; cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL); - if (!cfi) { - unlock_kernel(); + if (!cfi) return -ENOMEM; - } lock_kernel(); -- cgit v1.2.3-70-g09d2 From 0aa9e4f147880b2d7d1eef1f0b45112af0e36f9f Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 30 Jul 2006 03:03:58 -0700 Subject: [PATCH] efs: Remove incorrect unlock_kernel from failure path in efs_symlink_readpage() If efs_symlink_readpage hits the -ENAMETOOLONG error path, it will call unlock_kernel without ever having called lock_kernel(); fix this by creating and jumping to a new label fail_notlocked rather than the fail label used after calling lock_kernel(). Signed-off-by: Josh Triplett Cc: Marcelo Tosatti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/efs/symlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index e249cf733a6..1d30d2ff440 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -22,7 +22,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page) err = -ENAMETOOLONG; if (size > 2 * EFS_BLOCKSIZE) - goto fail; + goto fail_notlocked; lock_kernel(); /* read first 512 bytes of link target */ @@ -47,6 +47,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page) return 0; fail: unlock_kernel(); +fail_notlocked: SetPageError(page); kunmap(page); unlock_page(page); -- cgit v1.2.3-70-g09d2 From 344fe78669d2d1cff9e8939598f6d0d865b6a75b Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 30 Jul 2006 03:03:59 -0700 Subject: [PATCH] ufs: remove incorrect unlock_kernel from failure path in ufs_symlink() ufs_symlink, in one of its error paths, calls unlock_kernel without ever having called lock_kernel(); fix this by creating and jumping to a new label out_notlocked rather than the out label used after calling lock_kernel(). Signed-off-by: Josh Triplett Cc: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index abd5f23a426..d344b411e26 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -129,7 +129,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, struct inode * inode; if (l > sb->s_blocksize) - goto out; + goto out_notlocked; lock_kernel(); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); @@ -155,6 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, err = ufs_add_nondir(dentry, inode); out: unlock_kernel(); +out_notlocked: return err; out_fail: -- cgit v1.2.3-70-g09d2 From 685d16ddb07b74537fb18972784e6214840fdd20 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sun, 30 Jul 2006 03:04:08 -0700 Subject: [PATCH] fuse: fix zero timeout An attribute and entry timeout of zero should mean, that the entity is invalidated immediately after the operation. Previously invalidation only happened at the next clock tick. Reported and tested by Craig Davies. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 72a74cde6de..6db66ec386a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -25,8 +25,11 @@ */ static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) { - struct timespec ts = {sec, nsec}; - return jiffies + timespec_to_jiffies(&ts); + if (sec || nsec) { + struct timespec ts = {sec, nsec}; + return jiffies + timespec_to_jiffies(&ts); + } else + return jiffies - 1; } /* -- cgit v1.2.3-70-g09d2 From 0a0898cf413876d4ed6e371f3e04bf38600a9205 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sun, 30 Jul 2006 03:04:10 -0700 Subject: [PATCH] fuse: use jiffies_64 It is entirely possible (though rare) that jiffies half-wraps around, while a dentry/inode remains in the cache. This could mean that the dentry/inode is not invalidated for another half wraparound-time. To get around this problem, use 64-bit jiffies. The only problem with this is that dentry->d_time is 32 bits on 32-bit archs. So use d_fsdata as the high 32 bits. This is an ugly hack, but far simpler, than having to allocate private data just for this purpose. Since 64-bit jiffies can be assumed never to wrap around, simple comparison can be used, and a zero time value can represent "invalid". Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dir.c | 44 ++++++++++++++++++++++++++++++++++++-------- fs/fuse/fuse_i.h | 2 +- fs/fuse/inode.c | 2 +- 3 files changed, 38 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6db66ec386a..409ce6a7cca 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -14,6 +14,33 @@ #include #include +#if BITS_PER_LONG >= 64 +static inline void fuse_dentry_settime(struct dentry *entry, u64 time) +{ + entry->d_time = time; +} + +static inline u64 fuse_dentry_time(struct dentry *entry) +{ + return entry->d_time; +} +#else +/* + * On 32 bit archs store the high 32 bits of time in d_fsdata + */ +static void fuse_dentry_settime(struct dentry *entry, u64 time) +{ + entry->d_time = time; + entry->d_fsdata = (void *) (unsigned long) (time >> 32); +} + +static u64 fuse_dentry_time(struct dentry *entry) +{ + return (u64) entry->d_time + + ((u64) (unsigned long) entry->d_fsdata << 32); +} +#endif + /* * FUSE caches dentries and attributes with separate timeout. The * time in jiffies until the dentry/attributes are valid is stored in @@ -23,13 +50,13 @@ /* * Calculate the time in jiffies until a dentry/attributes are valid */ -static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) +static u64 time_to_jiffies(unsigned long sec, unsigned long nsec) { if (sec || nsec) { struct timespec ts = {sec, nsec}; - return jiffies + timespec_to_jiffies(&ts); + return get_jiffies_64() + timespec_to_jiffies(&ts); } else - return jiffies - 1; + return 0; } /* @@ -38,7 +65,8 @@ static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) */ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) { - entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec); + fuse_dentry_settime(entry, + time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); if (entry->d_inode) get_fuse_inode(entry->d_inode)->i_time = time_to_jiffies(o->attr_valid, o->attr_valid_nsec); @@ -50,7 +78,7 @@ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) */ void fuse_invalidate_attr(struct inode *inode) { - get_fuse_inode(inode)->i_time = jiffies - 1; + get_fuse_inode(inode)->i_time = 0; } /* @@ -63,7 +91,7 @@ void fuse_invalidate_attr(struct inode *inode) */ static void fuse_invalidate_entry_cache(struct dentry *entry) { - entry->d_time = jiffies - 1; + fuse_dentry_settime(entry, 0); } /* @@ -105,7 +133,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (inode && is_bad_inode(inode)) return 0; - else if (time_after(jiffies, entry->d_time)) { + else if (fuse_dentry_time(entry) < get_jiffies_64()) { int err; struct fuse_entry_out outarg; struct fuse_conn *fc; @@ -669,7 +697,7 @@ static int fuse_revalidate(struct dentry *entry) if (!fuse_allow_task(fc, current)) return -EACCES; if (get_node_id(inode) != FUSE_ROOT_ID && - time_before_eq(jiffies, fi->i_time)) + fi->i_time >= get_jiffies_64()) return 0; return fuse_do_getattr(inode); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0dbf9662184..69c7750d55b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -59,7 +59,7 @@ struct fuse_inode { struct fuse_req *forget_req; /** Time in jiffies until the file attributes are valid */ - unsigned long i_time; + u64 i_time; }; /** FUSE specific file data */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index dcaaabd3b9c..7d25092262a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -51,7 +51,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) return NULL; fi = get_fuse_inode(inode); - fi->i_time = jiffies - 1; + fi->i_time = 0; fi->nodeid = 0; fi->nlookup = 0; fi->forget_req = fuse_request_alloc(); -- cgit v1.2.3-70-g09d2 From 873302c71c0e60234eb187b15f83c2d79e84c40a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sun, 30 Jul 2006 03:04:10 -0700 Subject: [PATCH] fuse: fix typo Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/control.c b/fs/fuse/control.c index a3bce3a7725..46fe60b2da2 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -105,7 +105,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, /* * Add a connection to the control filesystem (if it exists). Caller - * must host fuse_mutex + * must hold fuse_mutex */ int fuse_ctl_add_conn(struct fuse_conn *fc) { @@ -139,7 +139,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) /* * Remove a connection from the control filesystem (if it exists). - * Caller must host fuse_mutex + * Caller must hold fuse_mutex */ void fuse_ctl_remove_conn(struct fuse_conn *fc) { -- cgit v1.2.3-70-g09d2 From bc65ac6a0ffc66c56d1e6893685d7fe87c63cc44 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 30 Jul 2006 03:04:12 -0700 Subject: [PATCH] freevxfs: Add missing lock_kernel() to vxfs_readdir Commit 7b2fd697427e73c81d5fa659efd91bd07d303b0e in the historical GIT tree stopped calling the readdir member of a file_operations struct with the big kernel lock held, and fixed up all the readdir functions to do their own locking. However, that change added calls to unlock_kernel() in vxfs_readdir, but no call to lock_kernel(). Fix this by adding a call to lock_kernel(). Signed-off-by: Josh Triplett Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/freevxfs/vxfs_lookup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 29cce456c7c..43886fa00a2 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -246,6 +246,8 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) u_long page, npages, block, pblocks, nblocks, offset; loff_t pos; + lock_kernel(); + switch ((long)fp->f_pos) { case 0: if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) -- cgit v1.2.3-70-g09d2 From 0e31f51d8177320d61ec5786ca4aafa7b7a749b4 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 30 Jul 2006 03:04:14 -0700 Subject: [PATCH] ext3 -nobh option causes oops For files other than IFREG, nobh option doesn't make sense. Modifications to them are journalled and needs buffer heads to do that. Without this patch, we get kernel oops in page_buffers(). Signed-off-by: Badari Pulavarty Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ab034d3053e..c5ee9f0691e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1158,7 +1158,7 @@ retry: ret = PTR_ERR(handle); goto out; } - if (test_opt(inode->i_sb, NOBH)) + if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) ret = nobh_prepare_write(page, from, to, ext3_get_block); else ret = block_prepare_write(page, from, to, ext3_get_block); @@ -1244,7 +1244,7 @@ static int ext3_writeback_commit_write(struct file *file, struct page *page, if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - if (test_opt(inode->i_sb, NOBH)) + if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) ret = nobh_commit_write(file, page, from, to); else ret = generic_commit_write(file, page, from, to); @@ -1494,7 +1494,7 @@ static int ext3_writeback_writepage(struct page *page, goto out_fail; } - if (test_opt(inode->i_sb, NOBH)) + if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) ret = nobh_writepage(page, ext3_get_block, wbc); else ret = block_write_full_page(page, ext3_get_block, wbc); -- cgit v1.2.3-70-g09d2 From 4c90c68aca278f425afc0b48d86298b960fbc0ce Mon Sep 17 00:00:00 2001 From: Russ Ross Date: Sun, 30 Jul 2006 03:04:15 -0700 Subject: [PATCH] 9p: fix marshalling bug in tcreate with empty extension field Signed-off-by: Russ Ross Signed-off-by: Eric Van Hensbergen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/conv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/9p/conv.c b/fs/9p/conv.c index 1e898144eb7..56d88c1a09c 100644 --- a/fs/9p/conv.c +++ b/fs/9p/conv.c @@ -673,8 +673,10 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, struct cbuf *bufp = &buffer; size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ - if (extended && extension!=NULL) - size += 2 + strlen(extension); /* extension[s] */ + if (extended) { + size += 2 + /* extension[s] */ + (extension == NULL ? 0 : strlen(extension)); + } fc = v9fs_create_common(bufp, size, TCREATE); if (IS_ERR(fc)) -- cgit v1.2.3-70-g09d2 From 834a9b8ca7a01c34570be021f88e18884a29f048 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Sun, 30 Jul 2006 03:04:16 -0700 Subject: [PATCH] 9p: fix fid behavior on failed remove Based on a bug report from Russ Ross According to the spec: "The remove request asks the file server both to remove the file represented by fid and to clunk the fid, even if the remove fails." but the Linux client seems to expect the fid to be valid after a failed remove attempt. Specifically, I'm getting this behavior when attempting to remove a non-empty directory. Signed-off-by: Eric Van Hensbergen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/vfs_inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 2f580a197b8..eae50c9d6dc 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -434,11 +434,11 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) result = v9fs_t_remove(v9ses, fid, &fcall); if (result < 0) { PRINT_FCALL_ERROR("remove fails", fcall); - } else { - v9fs_put_idpool(fid, &v9ses->fidpool); - v9fs_fid_destroy(v9fid); } + v9fs_put_idpool(fid, &v9ses->fidpool); + v9fs_fid_destroy(v9fid); + kfree(fcall); return result; } -- cgit v1.2.3-70-g09d2 From 3e2efce067cec0099f99ae59f28feda99b02b498 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 13 Jul 2006 13:16:02 -0400 Subject: [PATCH] fix faulty inode data collection for open() with O_CREAT When the specified path is an existing file or when it is a symlink, audit collects the wrong inode number, which causes it to miss the open() event. Adding a second hook to the open() path fixes this. Also add audit_copy_inode() to consolidate some code. Signed-off-by: Amy Griffis Signed-off-by: Al Viro --- fs/namei.c | 2 ++ include/linux/audit.h | 7 ++++++ kernel/auditsc.c | 63 +++++++++++++++++++++++++++++++++------------------ 3 files changed, 50 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index e01070d7bf5..47a7bad92d2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1659,6 +1659,7 @@ do_last: * It already exists. */ mutex_unlock(&dir->d_inode->i_mutex); + audit_inode_update(path.dentry->d_inode); error = -EEXIST; if (flag & O_EXCL) @@ -1669,6 +1670,7 @@ do_last: if (flag & O_NOFOLLOW) goto exit_dput; } + error = -ENOENT; if (!path.dentry->d_inode) goto exit_dput; diff --git a/include/linux/audit.h b/include/linux/audit.h index b27d7debc5a..e7e5e534898 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -328,6 +328,7 @@ extern void audit_putname(const char *name); extern void __audit_inode(const char *name, const struct inode *inode); extern void __audit_inode_child(const char *dname, const struct inode *inode, unsigned long pino); +extern void __audit_inode_update(const struct inode *inode); static inline void audit_getname(const char *name) { if (unlikely(current->audit_context)) @@ -343,6 +344,10 @@ static inline void audit_inode_child(const char *dname, if (unlikely(current->audit_context)) __audit_inode_child(dname, inode, pino); } +static inline void audit_inode_update(const struct inode *inode) { + if (unlikely(current->audit_context)) + __audit_inode_update(inode); +} /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); @@ -414,8 +419,10 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) #define audit_putname(n) do { ; } while (0) #define __audit_inode(n,i) do { ; } while (0) #define __audit_inode_child(d,i,p) do { ; } while (0) +#define __audit_inode_update(i) do { ; } while (0) #define audit_inode(n,i) do { ; } while (0) #define audit_inode_child(d,i,p) do { ; } while (0) +#define audit_inode_update(i) do { ; } while (0) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) #define audit_ipc_obj(i) ({ 0; }) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ae40ac8c39e..b939ed2da3e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1199,14 +1199,18 @@ void audit_putname(const char *name) #endif } -static void audit_inode_context(int idx, const struct inode *inode) +/* Copy inode data into an audit_names. */ +static void audit_copy_inode(struct audit_names *name, const struct inode *inode) { - struct audit_context *context = current->audit_context; - - selinux_get_inode_sid(inode, &context->names[idx].osid); + name->ino = inode->i_ino; + name->dev = inode->i_sb->s_dev; + name->mode = inode->i_mode; + name->uid = inode->i_uid; + name->gid = inode->i_gid; + name->rdev = inode->i_rdev; + selinux_get_inode_sid(inode, &name->osid); } - /** * audit_inode - store the inode and device from a lookup * @name: name being audited @@ -1240,13 +1244,7 @@ void __audit_inode(const char *name, const struct inode *inode) ++context->ino_count; #endif } - context->names[idx].ino = inode->i_ino; - context->names[idx].dev = inode->i_sb->s_dev; - context->names[idx].mode = inode->i_mode; - context->names[idx].uid = inode->i_uid; - context->names[idx].gid = inode->i_gid; - context->names[idx].rdev = inode->i_rdev; - audit_inode_context(idx, inode); + audit_copy_inode(&context->names[idx], inode); } /** @@ -1302,16 +1300,37 @@ update_context: context->names[idx].name_len = AUDIT_NAME_FULL; context->names[idx].name_put = 0; /* don't call __putname() */ - if (inode) { - context->names[idx].ino = inode->i_ino; - context->names[idx].dev = inode->i_sb->s_dev; - context->names[idx].mode = inode->i_mode; - context->names[idx].uid = inode->i_uid; - context->names[idx].gid = inode->i_gid; - context->names[idx].rdev = inode->i_rdev; - audit_inode_context(idx, inode); - } else - context->names[idx].ino = (unsigned long)-1; + if (!inode) + context->names[idx].ino = (unsigned long)-1; + else + audit_copy_inode(&context->names[idx], inode); +} + +/** + * audit_inode_update - update inode info for last collected name + * @inode: inode being audited + * + * When open() is called on an existing object with the O_CREAT flag, the inode + * data audit initially collects is incorrect. This additional hook ensures + * audit has the inode data for the actual object to be opened. + */ +void __audit_inode_update(const struct inode *inode) +{ + struct audit_context *context = current->audit_context; + int idx; + + if (!context->in_syscall || !inode) + return; + + if (context->name_count == 0) { + context->name_count++; +#if AUDIT_DEBUG + context->ino_count++; +#endif + } + idx = context->name_count - 1; + + audit_copy_inode(&context->names[idx], inode); } /** -- cgit v1.2.3-70-g09d2 From 73d3ec5abad3f1730ac8530899d2c14d92f3ad63 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 13 Jul 2006 13:16:39 -0400 Subject: [PATCH] fix missed create event for directory audit When an object is created via a symlink into an audited directory, audit misses the event due to not having collected the inode data for the directory. Modify __audit_inode_child() to copy the parent inode data if a parent wasn't found in audit_names[]. Signed-off-by: Amy Griffis Signed-off-by: Al Viro --- fs/namei.c | 2 +- include/linux/audit.h | 8 ++++---- include/linux/fsnotify.h | 6 +++--- kernel/auditsc.c | 16 +++++++++++++--- 4 files changed, 21 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 47a7bad92d2..0ab26cbdacc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1357,7 +1357,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) return -ENOENT; BUG_ON(victim->d_parent->d_inode != dir); - audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino); + audit_inode_child(victim->d_name.name, victim->d_inode, dir); error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); if (error) diff --git a/include/linux/audit.h b/include/linux/audit.h index e7e5e534898..bf196c05826 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -327,7 +327,7 @@ extern void __audit_getname(const char *name); extern void audit_putname(const char *name); extern void __audit_inode(const char *name, const struct inode *inode); extern void __audit_inode_child(const char *dname, const struct inode *inode, - unsigned long pino); + const struct inode *parent); extern void __audit_inode_update(const struct inode *inode); static inline void audit_getname(const char *name) { @@ -339,10 +339,10 @@ static inline void audit_inode(const char *name, const struct inode *inode) { __audit_inode(name, inode); } static inline void audit_inode_child(const char *dname, - const struct inode *inode, - unsigned long pino) { + const struct inode *inode, + const struct inode *parent) { if (unlikely(current->audit_context)) - __audit_inode_child(dname, inode, pino); + __audit_inode_child(dname, inode, parent); } static inline void audit_inode_update(const struct inode *inode) { if (unlikely(current->audit_context)) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index cc5dec70c32..d4f219ffaa5 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -67,7 +67,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); } - audit_inode_child(new_name, source, new_dir->i_ino); + audit_inode_child(new_name, source, new_dir); } /* @@ -98,7 +98,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, dentry->d_inode); - audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino); + audit_inode_child(dentry->d_name.name, dentry->d_inode, inode); } /* @@ -109,7 +109,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, dentry->d_name.name, dentry->d_inode); - audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino); + audit_inode_child(dentry->d_name.name, dentry->d_inode, inode); } /* diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b939ed2da3e..b1356fc63b2 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1251,7 +1251,7 @@ void __audit_inode(const char *name, const struct inode *inode) * audit_inode_child - collect inode info for created/removed objects * @dname: inode's dentry name * @inode: inode being audited - * @pino: inode number of dentry parent + * @parent: inode of dentry parent * * For syscalls that create or remove filesystem objects, audit_inode * can only collect information for the filesystem object's parent. @@ -1262,7 +1262,7 @@ void __audit_inode(const char *name, const struct inode *inode) * unsuccessful attempts. */ void __audit_inode_child(const char *dname, const struct inode *inode, - unsigned long pino) + const struct inode *parent) { int idx; struct audit_context *context = current->audit_context; @@ -1276,7 +1276,7 @@ void __audit_inode_child(const char *dname, const struct inode *inode, if (!dname) goto update_context; for (idx = 0; idx < context->name_count; idx++) - if (context->names[idx].ino == pino) { + if (context->names[idx].ino == parent->i_ino) { const char *name = context->names[idx].name; if (!name) @@ -1304,6 +1304,16 @@ update_context: context->names[idx].ino = (unsigned long)-1; else audit_copy_inode(&context->names[idx], inode); + + /* A parent was not found in audit_names, so copy the inode data for the + * provided parent. */ + if (!found_name) { + idx = context->name_count++; +#if AUDIT_DEBUG + context->ino_count++; +#endif + audit_copy_inode(&context->names[idx], parent); + } } /** -- cgit v1.2.3-70-g09d2 From 5ac3a9c26c1cc4861d9cdd8b293fecbfcdc81afe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jul 2006 06:38:45 -0400 Subject: [PATCH] don't bother with aux entires for dummy context Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- include/linux/audit.h | 22 +++++++++++----------- kernel/auditsc.c | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 0ab26cbdacc..55a131230f9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -159,7 +159,7 @@ char * getname(const char __user * filename) #ifdef CONFIG_AUDITSYSCALL void putname(const char *name) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) audit_putname(name); else __putname(name); @@ -1125,7 +1125,7 @@ static int fastcall do_path_lookup(int dfd, const char *name, retval = link_path_walk(name, nd); out: if (likely(retval == 0)) { - if (unlikely(current->audit_context && nd && nd->dentry && + if (unlikely(!audit_dummy_context() && nd && nd->dentry && nd->dentry->d_inode)) audit_inode(name, nd->dentry->d_inode); } diff --git a/include/linux/audit.h b/include/linux/audit.h index 3f736d65821..64f9f9e56ac 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -336,21 +336,21 @@ static inline int audit_dummy_context(void) } static inline void audit_getname(const char *name) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) __audit_getname(name); } static inline void audit_inode(const char *name, const struct inode *inode) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) __audit_inode(name, inode); } static inline void audit_inode_child(const char *dname, const struct inode *inode, const struct inode *parent) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) __audit_inode_child(dname, inode, parent); } static inline void audit_inode_update(const struct inode *inode) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) __audit_inode_update(inode); } @@ -375,43 +375,43 @@ extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) return __audit_ipc_obj(ipcp); return 0; } static inline int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) return __audit_ipc_set_perm(qbytes, uid, gid, mode); return 0; } static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) return __audit_mq_open(oflag, mode, u_attr); return 0; } static inline int audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) return __audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout); return 0; } static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout); return 0; } static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) return __audit_mq_notify(mqdes, u_notification); return 0; } static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { - if (unlikely(current->audit_context)) + if (unlikely(!audit_dummy_context())) return __audit_mq_getsetattr(mqdes, mqstat); return 0; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9618d150725..f571c7e925e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1676,7 +1676,7 @@ int audit_bprm(struct linux_binprm *bprm) unsigned long p, next; void *to; - if (likely(!audit_enabled || !context)) + if (likely(!audit_enabled || !context || context->dummy)) return 0; ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, @@ -1714,7 +1714,7 @@ int audit_socketcall(int nargs, unsigned long *args) struct audit_aux_data_socketcall *ax; struct audit_context *context = current->audit_context; - if (likely(!context)) + if (likely(!context || context->dummy)) return 0; ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL); @@ -1742,7 +1742,7 @@ int audit_sockaddr(int len, void *a) struct audit_aux_data_sockaddr *ax; struct audit_context *context = current->audit_context; - if (likely(!context)) + if (likely(!context || context->dummy)) return 0; ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From ce510193272c295b891e45525a83b543ae3207c1 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Mon, 24 Jul 2006 16:30:00 -0700 Subject: NFS: Release dcache_lock in an error path of nfs_path In one of the error paths of nfs_path, it may return with dcache_lock still held; fix this by adding and using a new error path Elong_unlock which unlocks dcache_lock. Signed-off-by: Josh Triplett Signed-off-by: Trond Myklebust (cherry picked from f4b90b43677fb23297c56802c3056fc304f988d9 commit) --- fs/nfs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 19b98ca468e..86b3169c8ca 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -51,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry, namelen = dentry->d_name.len; buflen -= namelen + 1; if (buflen < 0) - goto Elong; + goto Elong_unlock; end -= namelen; memcpy(end, dentry->d_name.name, namelen); *--end = '/'; @@ -68,6 +68,8 @@ char *nfs_path(const char *base, const struct dentry *dentry, end -= namelen; memcpy(end, base, namelen); return end; +Elong_unlock: + spin_unlock(&dcache_lock); Elong: return ERR_PTR(-ENAMETOOLONG); } -- cgit v1.2.3-70-g09d2 From e4e20512cfe0bacec0764b4925889d1fa94644f9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 3 Aug 2006 15:07:47 -0400 Subject: NFS: make 2 functions static nfs_writedata_free() and nfs_readdata_free() can now become static. Signed-off-by: Adrian Bunk Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust (cherry picked from 5e1ce40f0c3c8f67591aff17756930d7a18ceb1a commit) --- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- include/linux/nfs_fs.h | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 52bf634260a..65c0c5b3235 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -63,7 +63,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -void nfs_readdata_free(struct nfs_read_data *p) +static void nfs_readdata_free(struct nfs_read_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 86bac6a5008..50774991f8d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -137,7 +137,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -void nfs_writedata_free(struct nfs_write_data *p) +static void nfs_writedata_free(struct nfs_write_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 55ea853d57b..247434553ae 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -476,10 +476,9 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) } /* - * Allocate and free nfs_write_data structures + * Allocate nfs_write_data structures */ extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount); -extern void nfs_writedata_free(struct nfs_write_data *p); /* * linux/fs/nfs/read.c @@ -491,10 +490,9 @@ extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); extern void nfs_readdata_release(void *data); /* - * Allocate and free nfs_read_data structures + * Allocate nfs_read_data structures */ extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount); -extern void nfs_readdata_free(struct nfs_read_data *p); /* * linux/fs/nfs3proc.c -- cgit v1.2.3-70-g09d2 From f3d43c769d14b7065da7f62ec468b1fcb8cd6e06 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 3 Aug 2006 15:07:47 -0400 Subject: NLM/lockd: remove b_done We never actually set the b_done field any more; it's always zero. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust (cherry picked from af8412d4283ef91356e65e0ed9b025b376aebded commit) --- fs/lockd/svclock.c | 12 +++--------- include/linux/lockd/lockd.h | 1 - 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index baf5ae51348..c9d419703cf 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -638,9 +638,6 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) if (task->tk_status < 0) { /* RPC error: Re-insert for retransmission */ timeout = 10 * HZ; - } else if (block->b_done) { - /* Block already removed, kill it for real */ - timeout = 0; } else { /* Call was successful, now wait for client callback */ timeout = 60 * HZ; @@ -709,13 +706,10 @@ nlmsvc_retry_blocked(void) break; if (time_after(block->b_when,jiffies)) break; - dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n", - block, block->b_when, block->b_done); + dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", + block, block->b_when); kref_get(&block->b_count); - if (block->b_done) - nlmsvc_unlink_block(block); - else - nlmsvc_grant_blocked(block); + nlmsvc_grant_blocked(block); nlmsvc_release_block(block); } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index aa4fe905bb4..0d92c468d55 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -123,7 +123,6 @@ struct nlm_block { unsigned int b_id; /* block id */ unsigned char b_queued; /* re-queued */ unsigned char b_granted; /* VFS granted lock */ - unsigned char b_done; /* callback complete */ struct nlm_file * b_file; /* file in question */ }; -- cgit v1.2.3-70-g09d2 From 1fb32b7bd8203d0175649a75ede3ee7634d6a941 Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Sat, 5 Aug 2006 12:13:55 -0700 Subject: [PATCH] ufs: ufs_get_locked_page() race fix As discussed earlier: http://lkml.org/lkml/2006/6/28/136 this patch fixes such issue: `ufs_get_locked_page' takes page from cache after that `vmtruncate' takes page and deletes it from cache `ufs_get_locked_page' locks page, and reports about EIO error. Also because of find_lock_page always return valid page or NULL, we have no need to check it if page not NULL. Signed-off-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/util.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 337cf2c46d1..005d6815adf 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -257,6 +257,7 @@ try_again: page = read_cache_page(mapping, index, (filler_t*)mapping->a_ops->readpage, NULL); + if (IS_ERR(page)) { printk(KERN_ERR "ufs_change_blocknr: " "read_cache_page error: ino %lu, index: %lu\n", @@ -266,6 +267,13 @@ try_again: lock_page(page); + if (unlikely(page->mapping == NULL)) { + /* Truncate got there first */ + unlock_page(page); + page_cache_release(page); + goto try_again; + } + if (!PageUptodate(page) || PageError(page)) { unlock_page(page); page_cache_release(page); @@ -275,15 +283,8 @@ try_again: mapping->host->i_ino, index); page = ERR_PTR(-EIO); - goto out; } } - - if (unlikely(!page->mapping || !page_has_buffers(page))) { - unlock_page(page); - page_cache_release(page); - goto try_again;/*we really need these buffers*/ - } out: return page; } -- cgit v1.2.3-70-g09d2 From 06fa45d3a19c6fbfccbf295e9f08087492338631 Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Sat, 5 Aug 2006 12:13:57 -0700 Subject: [PATCH] ufs: handle truncated pages ufs_get_locked_page is called twice in ufs code, one time in ufs_truncate path(we allocated last block), and another time when fragments are reallocated. In ideal world in the second case on allocation/free block layer we should not know that things like `truncate' exists, but now with such crutch like ufs_get_locked_page we can (or should?) skip truncated pages. Signed-off-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/balloc.c | 2 +- fs/ufs/util.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b01804baa12..b8238147577 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -248,7 +248,7 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk, if (likely(cur_index != index)) { page = ufs_get_locked_page(mapping, index); - if (IS_ERR(page)) + if (!page || IS_ERR(page)) /* it was truncated or EIO */ continue; } else page = locked_page; diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 005d6815adf..22f820a9b15 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -251,7 +251,6 @@ struct page *ufs_get_locked_page(struct address_space *mapping, { struct page *page; -try_again: page = find_lock_page(mapping, index); if (!page) { page = read_cache_page(mapping, index, @@ -271,7 +270,8 @@ try_again: /* Truncate got there first */ unlock_page(page); page_cache_release(page); - goto try_again; + page = NULL; + goto out; } if (!PageUptodate(page) || PageError(page)) { -- cgit v1.2.3-70-g09d2 From b0b33dee2dcc85626627919094befc17cfb141e4 Mon Sep 17 00:00:00 2001 From: Alexander Zarochentsev Date: Sat, 5 Aug 2006 12:14:01 -0700 Subject: [PATCH] i_mutex does not need to be locked in reiserfs_delete_inode() Fixes an i_mutex-inside-i_mutex lockdep nasty. Signed-off-by: Alexander Zarochentsev Cc: Cc: Hans Reiser Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/inode.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 12dfdcfbee3..ac57305b1af 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -39,14 +39,10 @@ void reiserfs_delete_inode(struct inode *inode) /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ - mutex_lock(&inode->i_mutex); - reiserfs_delete_xattrs(inode); - if (journal_begin(&th, inode->i_sb, jbegin_count)) { - mutex_unlock(&inode->i_mutex); + if (journal_begin(&th, inode->i_sb, jbegin_count)) goto out; - } reiserfs_update_inode_transaction(inode); err = reiserfs_delete_object(&th, inode); @@ -57,12 +53,8 @@ void reiserfs_delete_inode(struct inode *inode) if (!err) DQUOT_FREE_INODE(inode); - if (journal_end(&th, inode->i_sb, jbegin_count)) { - mutex_unlock(&inode->i_mutex); + if (journal_end(&th, inode->i_sb, jbegin_count)) goto out; - } - - mutex_unlock(&inode->i_mutex); /* check return value from reiserfs_delete_object after * ending the transaction -- cgit v1.2.3-70-g09d2 From 94f563c426a78c97fc2a377315995e6ec8343872 Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Sat, 5 Aug 2006 12:14:55 -0700 Subject: [PATCH] Fix BeFS slab corruption In bugzilla #6941, Jens Kilian reported: "The function befs_utf2nls (in fs/befs/linuxvfs.c) writes a 0 byte past the end of a block of memory allocated via kmalloc(), leading to memory corruption. This happens only for filenames which are pure ASCII and a multiple of 4 bytes in length. [...] Without DEBUG_SLAB, this leads to further corruption and hard lockups; I believe this is the bug which has made kernels later than 2.6.8 unusable for me. (This must be due to changes in memory management, the bug has been in the BeFS driver since the time it was introduced (AFAICT).) Steps to reproduce: Create a directory (in BeOS, naturally :-) with files named, e.g., "1", "22", "333", "4444", ... Mount it in Linux and do an "ls" or "find"" This patch implements the suggested fix. Credits to Jens Kilian for debugging the problem and finding the right fix. Signed-off-by: Diego Calleja Cc: Jens Kilian Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/befs/linuxvfs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index fcaeead9696..50cfca5c7ef 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -512,7 +512,11 @@ befs_utf2nls(struct super_block *sb, const char *in, wchar_t uni; int unilen, utflen; char *result; - int maxlen = in_len; /* The utf8->nls conversion can't make more chars */ + /* The utf8->nls conversion won't make the final nls string bigger + * than the utf one, but if the string is pure ascii they'll have the + * same width and an extra char is needed to save the additional \0 + */ + int maxlen = in_len + 1; befs_debug(sb, "---> utf2nls()"); @@ -588,7 +592,10 @@ befs_nls2utf(struct super_block *sb, const char *in, wchar_t uni; int unilen, utflen; char *result; - int maxlen = 3 * in_len; + /* There're nls characters that will translate to 3-chars-wide UTF-8 + * characters, a additional byte is needed to save the final \0 + * in special cases */ + int maxlen = (3 * in_len) + 1; befs_debug(sb, "---> nls2utf()\n"); -- cgit v1.2.3-70-g09d2 From b5f3953c10b27fcd1c83e199e573b41d8327e22e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 5 Aug 2006 12:15:08 -0700 Subject: [PATCH] fix reiserfs lock inversion of bkl vs inode semaphore The correct lock ordering is inode lock -> BKL Signed-off-by: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/file.c | 2 +- fs/reiserfs/ioctl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index f318b58510f..1627edd5081 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -48,8 +48,8 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) return 0; } - reiserfs_write_lock(inode->i_sb); mutex_lock(&inode->i_mutex); + reiserfs_write_lock(inode->i_sb); /* freeing preallocation only involves relogging blocks that * are already in the current transaction. preallocation gets * freed at the end of each transaction, so it is impossible for diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 745c8810089..a986b5e1e28 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -116,12 +116,12 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp) if (REISERFS_I(inode)->i_flags & i_nopack_mask) { return 0; } - reiserfs_write_lock(inode->i_sb); /* we need to make sure nobody is changing the file size beneath ** us */ mutex_lock(&inode->i_mutex); + reiserfs_write_lock(inode->i_sb); write_from = inode->i_size & (blocksize - 1); /* if we are on a block boundary, we are already unpacked. */ -- cgit v1.2.3-70-g09d2 From b4c76fa721c7c8a43655a74e508870d21d2e26d3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 5 Aug 2006 12:15:10 -0700 Subject: [PATCH] reiserfs_write_full_page() should not get_block past eof reiserfs_write_full_page does zero bytes in the file past eof, but it may call get_block on those buffers as well. On machines where the page size is larger than the blocksize, this can result in mmaped files incorrectly growing up to a block boundary during writepage. The fix is to avoid calling get_block for any blocks that are entirely past eof Signed-off-by: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/inode.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ac57305b1af..52f1e213654 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2340,6 +2340,7 @@ static int reiserfs_write_full_page(struct page *page, unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; int error = 0; unsigned long block; + sector_t last_block; struct buffer_head *head, *bh; int partial = 0; int nr = 0; @@ -2387,10 +2388,19 @@ static int reiserfs_write_full_page(struct page *page, } bh = head; block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); + last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; /* first map all the buffers, logging any direct items we find */ do { - if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || - (buffer_mapped(bh) + if (block > last_block) { + /* + * This can happen when the block size is less than + * the page size. The corresponding bytes in the page + * were zero filled above + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + } else if ((checked || buffer_dirty(bh)) && + (!buffer_mapped(bh) || (buffer_mapped(bh) && bh->b_blocknr == 0))) { /* not mapped yet, or it points to a direct item, search -- cgit v1.2.3-70-g09d2 From 225add619624b4877941470f31d297e0151b21be Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 5 Aug 2006 12:15:17 -0700 Subject: [PATCH] udf: initialize parts of inode earlier in create I saw an oops down this path when trying to create a new file on a UDF filesystem which was internally marked as readonly, but mounted rw: udf_create udf_new_inode new_inode alloc_inode udf_alloc_inode udf_new_block returns EIO due to readonlyness iput (on error) udf_put_inode udf_discard_prealloc udf_next_aext udf_current_aext udf_get_fileshortad OOPS the udf_discard_prealloc() path was examining uninitialized fields of the udf inode. udf_discard_prealloc() already has this code to short-circuit the discard path if no extents are preallocated: if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || inode->i_size == UDF_I_LENEXTENTS(inode)) { return; } so if we initialize UDF_I_LENEXTENTS(inode) = 0 earlier in udf_new_inode, we won't try to free the (not) preallocated blocks, since this will match the i_size = 0 set when the inode was initialized. Signed-off-by: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/udf/ialloc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 3873c672cb4..33323473e3c 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -75,6 +75,12 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) } *err = -ENOSPC; + UDF_I_UNIQUE(inode) = 0; + UDF_I_LENEXTENTS(inode) = 0; + UDF_I_NEXT_ALLOC_BLOCK(inode) = 0; + UDF_I_NEXT_ALLOC_GOAL(inode) = 0; + UDF_I_STRAT4096(inode) = 0; + block = udf_new_block(dir->i_sb, NULL, UDF_I_LOCATION(dir).partitionReferenceNum, start, err); if (*err) @@ -84,11 +90,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) } mutex_lock(&sbi->s_alloc_mutex); - UDF_I_UNIQUE(inode) = 0; - UDF_I_LENEXTENTS(inode) = 0; - UDF_I_NEXT_ALLOC_BLOCK(inode) = 0; - UDF_I_NEXT_ALLOC_GOAL(inode) = 0; - UDF_I_STRAT4096(inode) = 0; if (UDF_SB_LVIDBH(sb)) { struct logicalVolHeaderDesc *lvhd; -- cgit v1.2.3-70-g09d2