diff options
Diffstat (limited to 'fs')
365 files changed, 9290 insertions, 5060 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index bed48fa9652..9ac4ffe9ac7 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -29,10 +29,10 @@ #include <linux/file.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/inet.h> #include <linux/pagemap.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index ddffd8aa902..d93960429c0 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -30,10 +30,10 @@ #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/inet.h> #include <linux/namei.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 3129688143e..1dd86ee90bc 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -29,7 +29,6 @@ #include <linux/file.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/sched.h> #include <linux/inet.h> #include <linux/idr.h> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index c7b67725384..6e7678e4852 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -30,7 +30,6 @@ #include <linux/file.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/inet.h> #include <linux/list.h> #include <asm/uaccess.h> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b01b0a45793..c76cd8fa3f6 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -30,10 +30,10 @@ #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/inet.h> #include <linux/namei.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 0ec42f66545..7bdf8b32684 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -31,12 +31,12 @@ #include <linux/file.h> #include <linux/stat.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/inet.h> #include <linux/pagemap.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/Kconfig b/fs/Kconfig index 8ea7b04c661..0fa0c1193e8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -314,7 +314,7 @@ config REISERFS_CHECK config REISERFS_PROC_INFO bool "Stats in /proc/fs/reiserfs" - depends on REISERFS_FS + depends on REISERFS_FS && PROC_FS help Create under /proc/fs/reiserfs a hierarchy of files, displaying various ReiserFS statistics and internal data at the expense of @@ -724,10 +724,6 @@ config FAT_FS file system and use GNU tar's M option. GNU tar is a program available for Unix and DOS ("man tar" or "info tar"). - It is now also becoming possible to read and write compressed FAT - file systems; read <file:Documentation/filesystems/fat_cvf.txt> for - details. - The FAT support will enlarge your kernel by about 37 KB. If unsure, say Y. diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 74c64409ddb..d4fc6095466 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -38,7 +38,7 @@ config BINFMT_ELF_FDPIC config BINFMT_FLAT tristate "Kernel support for flat binaries" - depends on !MMU || SUPERH + depends on !MMU help Support uClinux FLAT format binaries. diff --git a/fs/Makefile b/fs/Makefile index 9edf4112bee..720c29d57a6 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -22,6 +22,10 @@ endif obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_INOTIFY_USER) += inotify_user.o obj-$(CONFIG_EPOLL) += eventpoll.o +obj-$(CONFIG_ANON_INODES) += anon_inodes.o +obj-$(CONFIG_SIGNALFD) += signalfd.o +obj-$(CONFIG_TIMERFD) += timerfd.o +obj-$(CONFIG_EVENTFD) += eventfd.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o nfsd-$(CONFIG_NFSD) := nfsctl.o diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 30c29650849..de2ed5ca335 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -232,8 +232,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/affs/file.c b/fs/affs/file.c index 4aa8079e71b..c8796906f58 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -628,11 +628,7 @@ static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned return err; } if (to < PAGE_CACHE_SIZE) { - char *kaddr = kmap_atomic(page, KM_USER0); - - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0); if (size > offset + to) { if (size < offset + PAGE_CACHE_SIZE) tmp = size & ~PAGE_CACHE_MASK; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index c5b9d73c084..4609a6c13fe 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -9,7 +9,7 @@ * * (C) 1991 Linus Torvalds - minix filesystem */ - +#include <linux/sched.h> #include "affs.h" extern const struct inode_operations affs_symlink_inode_operations; diff --git a/fs/affs/super.c b/fs/affs/super.c index beff7d21e6e..6d0ebc32153 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -15,6 +15,7 @@ #include <linux/statfs.h> #include <linux/parser.h> #include <linux/magic.h> +#include <linux/sched.h> #include "affs.h" extern struct timezone sys_tz; @@ -87,11 +88,9 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct affs_inode_info *ei = (struct affs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - init_MUTEX(&ei->i_link_lock); - init_MUTEX(&ei->i_ext_lock); - inode_init_once(&ei->vfs_inode); - } + init_MUTEX(&ei->i_link_lock); + init_MUTEX(&ei->i_ext_lock); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/afs/Makefile b/fs/afs/Makefile index cf83e5d6351..73ce561f3ea 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -22,6 +22,7 @@ kafs-objs := \ vlclient.o \ vlocation.o \ vnode.o \ - volume.o + volume.o \ + write.o obj-$(CONFIG_AFS_FS) := kafs.o diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 52d0752265b..24525794814 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -16,6 +16,9 @@ #define AFS_MAXCELLNAME 64 /* maximum length of a cell name */ #define AFS_MAXVOLNAME 64 /* maximum length of a volume name */ +#define AFSNAMEMAX 256 /* maximum length of a filename plus NUL */ +#define AFSPATHMAX 1024 /* maximum length of a pathname plus NUL */ +#define AFSOPAQUEMAX 1024 /* maximum length of an opaque field */ typedef unsigned afs_volid_t; typedef unsigned afs_vnodeid_t; @@ -143,4 +146,24 @@ struct afs_volsync { time_t creation; /* volume creation time */ }; +/* + * AFS volume status record + */ +struct afs_volume_status { + u32 vid; /* volume ID */ + u32 parent_id; /* parent volume ID */ + u8 online; /* true if volume currently online and available */ + u8 in_service; /* true if volume currently in service */ + u8 blessed; /* same as in_service */ + u8 needs_salvage; /* true if consistency checking required */ + u32 type; /* volume type (afs_voltype_t) */ + u32 min_quota; /* minimum space set aside (blocks) */ + u32 max_quota; /* maximum space this volume may occupy (blocks) */ + u32 blocks_in_use; /* space this volume currently occupies (blocks) */ + u32 part_blocks_avail; /* space available in volume's partition */ + u32 part_max_blocks; /* size of volume's partition */ +}; + +#define AFS_BLOCK_SIZE 1024 + #endif /* AFS_H */ diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index 89e0d1650a7..a18c374ebe0 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -18,6 +18,8 @@ enum AFS_FS_Operations { FSFETCHDATA = 130, /* AFS Fetch file data */ FSFETCHSTATUS = 132, /* AFS Fetch file status */ + FSSTOREDATA = 133, /* AFS Store file data */ + FSSTORESTATUS = 135, /* AFS Store file status */ FSREMOVEFILE = 136, /* AFS Remove a file */ FSCREATEFILE = 137, /* AFS Create a file */ FSRENAME = 138, /* AFS Rename or move a file or directory */ @@ -26,9 +28,12 @@ enum AFS_FS_Operations { FSMAKEDIR = 141, /* AFS Create a directory */ FSREMOVEDIR = 142, /* AFS Remove a directory */ FSGIVEUPCALLBACKS = 147, /* AFS Discard callback promises */ - FSGETVOLUMEINFO = 148, /* AFS Get root volume information */ + FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ + FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ FSGETROOTVOLUME = 151, /* AFS Get root volume name */ FSLOOKUP = 161, /* AFS lookup file in directory */ + FSFETCHDATA64 = 65537, /* AFS Fetch file data */ + FSSTOREDATA64 = 65538, /* AFS Store file data */ }; enum AFS_FS_Errors { diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 9bdbf36a9aa..bacf518c6fa 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/circ_buf.h> +#include <linux/sched.h> #include "internal.h" unsigned afs_vnode_update_timeout = 10; @@ -44,7 +45,7 @@ void afs_init_callback_state(struct afs_server *server) while (!RB_EMPTY_ROOT(&server->cb_promises)) { vnode = rb_entry(server->cb_promises.rb_node, struct afs_vnode, cb_promise); - _debug("UNPROMISE { vid=%x vn=%u uq=%u}", + _debug("UNPROMISE { vid=%x:%u uq=%u}", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); rb_erase(&vnode->cb_promise, &server->cb_promises); vnode->cb_promised = false; @@ -84,11 +85,8 @@ void afs_broken_callback_work(struct work_struct *work) /* if the vnode's data version number changed then its contents * are different */ - if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - _debug("zap data {%x:%u}", - vnode->fid.vid, vnode->fid.vnode); - invalidate_remote_inode(&vnode->vfs_inode); - } + if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) + afs_zap_data(vnode); } out: diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 9b1311a1df5..175a567db78 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/key.h> #include <linux/ctype.h> +#include <linux/sched.h> #include <keys/rxrpc-type.h> #include "internal.h" diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0c1e902f17a..546c59522eb 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -16,6 +16,7 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/ctype.h> +#include <linux/sched.h> #include "internal.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, @@ -55,7 +56,8 @@ const struct inode_operations afs_dir_inode_operations = { .rmdir = afs_rmdir, .rename = afs_rename, .permission = afs_permission, - .getattr = afs_inode_getattr, + .getattr = afs_getattr, + .setattr = afs_setattr, }; static struct dentry_operations afs_fs_dentry_operations = { @@ -491,12 +493,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, vnode = AFS_FS_I(dir); - _enter("{%x:%d},%p{%s},", + _enter("{%x:%u},%p{%s},", vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name); ASSERTCMP(dentry->d_inode, ==, NULL); - if (dentry->d_name.len > 255) { + if (dentry->d_name.len >= AFSNAMEMAX) { _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } @@ -731,11 +733,11 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode) dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s},%o", + _enter("{%x:%u},{%s},%o", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); ret = -ENAMETOOLONG; - if (dentry->d_name.len > 255) + if (dentry->d_name.len >= AFSNAMEMAX) goto error; key = afs_request_key(dvnode->volume->cell); @@ -796,11 +798,11 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s}", + _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); ret = -ENAMETOOLONG; - if (dentry->d_name.len > 255) + if (dentry->d_name.len >= AFSNAMEMAX) goto error; key = afs_request_key(dvnode->volume->cell); @@ -842,11 +844,11 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s}", + _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); ret = -ENAMETOOLONG; - if (dentry->d_name.len > 255) + if (dentry->d_name.len >= AFSNAMEMAX) goto error; key = afs_request_key(dvnode->volume->cell); @@ -916,11 +918,11 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode, dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s},%o,", + _enter("{%x:%u},{%s},%o,", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); ret = -ENAMETOOLONG; - if (dentry->d_name.len > 255) + if (dentry->d_name.len >= AFSNAMEMAX) goto error; key = afs_request_key(dvnode->volume->cell); @@ -983,13 +985,13 @@ static int afs_link(struct dentry *from, struct inode *dir, vnode = AFS_FS_I(from->d_inode); dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%x:%d},{%s}", + _enter("{%x:%u},{%x:%u},{%s}", vnode->fid.vid, vnode->fid.vnode, dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); ret = -ENAMETOOLONG; - if (dentry->d_name.len > 255) + if (dentry->d_name.len >= AFSNAMEMAX) goto error; key = afs_request_key(dvnode->volume->cell); @@ -1032,16 +1034,16 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s},%s", + _enter("{%x:%u},{%s},%s", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, content); ret = -ENAMETOOLONG; - if (dentry->d_name.len > 255) + if (dentry->d_name.len >= AFSNAMEMAX) goto error; ret = -EINVAL; - if (strlen(content) > 1023) + if (strlen(content) >= AFSPATHMAX) goto error; key = afs_request_key(dvnode->volume->cell); @@ -1104,14 +1106,14 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); - _enter("{%x:%d},{%x:%d},{%x:%d},{%s}", + _enter("{%x:%u},{%x:%u},{%x:%u},{%s}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, vnode->fid.vid, vnode->fid.vnode, new_dvnode->fid.vid, new_dvnode->fid.vnode, new_dentry->d_name.name); ret = -ENAMETOOLONG; - if (new_dentry->d_name.len > 255) + if (new_dentry->d_name.len >= AFSNAMEMAX) goto error; key = afs_request_key(orig_dvnode->volume->cell); diff --git a/fs/afs/file.c b/fs/afs/file.c index ae256498f4f..9c0e721d9fc 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -15,32 +15,43 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> +#include <linux/writeback.h> #include "internal.h" -static int afs_file_readpage(struct file *file, struct page *page); -static void afs_file_invalidatepage(struct page *page, unsigned long offset); -static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); +static int afs_readpage(struct file *file, struct page *page); +static void afs_invalidatepage(struct page *page, unsigned long offset); +static int afs_releasepage(struct page *page, gfp_t gfp_flags); +static int afs_launder_page(struct page *page); const struct file_operations afs_file_operations = { .open = afs_open, .release = afs_release, .llseek = generic_file_llseek, .read = do_sync_read, + .write = do_sync_write, .aio_read = generic_file_aio_read, + .aio_write = afs_file_write, .mmap = generic_file_readonly_mmap, .sendfile = generic_file_sendfile, + .fsync = afs_fsync, }; const struct inode_operations afs_file_inode_operations = { - .getattr = afs_inode_getattr, + .getattr = afs_getattr, + .setattr = afs_setattr, .permission = afs_permission, }; const struct address_space_operations afs_fs_aops = { - .readpage = afs_file_readpage, - .set_page_dirty = __set_page_dirty_nobuffers, - .releasepage = afs_file_releasepage, - .invalidatepage = afs_file_invalidatepage, + .readpage = afs_readpage, + .set_page_dirty = afs_set_page_dirty, + .launder_page = afs_launder_page, + .releasepage = afs_releasepage, + .invalidatepage = afs_invalidatepage, + .prepare_write = afs_prepare_write, + .commit_write = afs_commit_write, + .writepage = afs_writepage, + .writepages = afs_writepages, }; /* @@ -52,7 +63,7 @@ int afs_open(struct inode *inode, struct file *file) struct key *key; int ret; - _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { @@ -78,7 +89,7 @@ int afs_release(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); - _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); key_put(file->private_data); _leave(" = 0"); @@ -89,10 +100,10 @@ int afs_release(struct inode *inode, struct file *file) * deal with notification that a page was read from the cache */ #ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_read_complete(void *cookie_data, - struct page *page, - void *data, - int error) +static void afs_readpage_read_complete(void *cookie_data, + struct page *page, + void *data, + int error) { _enter("%p,%p,%p,%d", cookie_data, page, data, error); @@ -109,10 +120,10 @@ static void afs_file_readpage_read_complete(void *cookie_data, * deal with notification that a page was written to the cache */ #ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_write_complete(void *cookie_data, - struct page *page, - void *data, - int error) +static void afs_readpage_write_complete(void *cookie_data, + struct page *page, + void *data, + int error) { _enter("%p,%p,%p,%d", cookie_data, page, data, error); @@ -121,9 +132,9 @@ static void afs_file_readpage_write_complete(void *cookie_data, #endif /* - * AFS read page from file (or symlink) + * AFS read page from file, directory or symlink */ -static int afs_file_readpage(struct file *file, struct page *page) +static int afs_readpage(struct file *file, struct page *page) { struct afs_vnode *vnode; struct inode *inode; @@ -219,26 +230,9 @@ error: } /* - * get a page cookie for the specified page - */ -#ifdef AFS_CACHING_SUPPORT -int afs_cache_get_page_cookie(struct page *page, - struct cachefs_page **_page_cookie) -{ - int ret; - - _enter(""); - ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO); - - _leave(" = %d", ret); - return ret; -} -#endif - -/* * invalidate part or all of a page */ -static void afs_file_invalidatepage(struct page *page, unsigned long offset) +static void afs_invalidatepage(struct page *page, unsigned long offset) { int ret = 1; @@ -247,11 +241,6 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset) BUG_ON(!PageLocked(page)); if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache,page); -#endif - /* We release buffers only if the entire page is being * invalidated. * The get_block cached value has been unconditionally @@ -272,25 +261,33 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset) } /* + * write back a dirty page + */ +static int afs_launder_page(struct page *page) +{ + _enter("{%lu}", page->index); + + return 0; +} + +/* * release a page and cleanup its private data */ -static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) +static int afs_releasepage(struct page *page, gfp_t gfp_flags) { - struct cachefs_page *pageio; + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + struct afs_writeback *wb; - _enter("{%lu},%x", page->index, gfp_flags); + _enter("{{%x:%u}[%lu],%lx},%x", + vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, + gfp_flags); if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache, page); -#endif - - pageio = (struct cachefs_page *) page_private(page); + wb = (struct afs_writeback *) page_private(page); + ASSERT(wb != NULL); set_page_private(page, 0); ClearPagePrivate(page); - - kfree(pageio); + afs_put_writeback(wb); } _leave(" = 0"); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index e54e6c2ad34..5dff1308b6f 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -33,8 +33,10 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid) */ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, struct afs_file_status *status, - struct afs_vnode *vnode) + struct afs_vnode *vnode, + afs_dataversion_t *store_version) { + afs_dataversion_t expected_version; const __be32 *bp = *_bp; umode_t mode; u64 data_version, size; @@ -101,7 +103,11 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; } - if (status->data_version != data_version) { + expected_version = status->data_version; + if (store_version) + expected_version = *store_version; + + if (expected_version != data_version) { status->data_version = data_version; if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { _debug("vnode modified %llx on {%x:%u}", @@ -110,6 +116,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, set_bit(AFS_VNODE_MODIFIED, &vnode->flags); set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); } + } else if (store_version) { + status->data_version = data_version; } } @@ -156,6 +164,67 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp, } /* + * encode the requested attributes into an AFSStoreStatus block + */ +static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr) +{ + __be32 *bp = *_bp; + u32 mask = 0, mtime = 0, owner = 0, group = 0, mode = 0; + + mask = 0; + if (attr->ia_valid & ATTR_MTIME) { + mask |= AFS_SET_MTIME; + mtime = attr->ia_mtime.tv_sec; + } + + if (attr->ia_valid & ATTR_UID) { + mask |= AFS_SET_OWNER; + owner = attr->ia_uid; + } + + if (attr->ia_valid & ATTR_GID) { + mask |= AFS_SET_GROUP; + group = attr->ia_gid; + } + + if (attr->ia_valid & ATTR_MODE) { + mask |= AFS_SET_MODE; + mode = attr->ia_mode & S_IALLUGO; + } + + *bp++ = htonl(mask); + *bp++ = htonl(mtime); + *bp++ = htonl(owner); + *bp++ = htonl(group); + *bp++ = htonl(mode); + *bp++ = 0; /* segment size */ + *_bp = bp; +} + +/* + * decode an AFSFetchVolumeStatus block + */ +static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, + struct afs_volume_status *vs) +{ + const __be32 *bp = *_bp; + + vs->vid = ntohl(*bp++); + vs->parent_id = ntohl(*bp++); + vs->online = ntohl(*bp++); + vs->in_service = ntohl(*bp++); + vs->blessed = ntohl(*bp++); + vs->needs_salvage = ntohl(*bp++); + vs->type = ntohl(*bp++); + vs->min_quota = ntohl(*bp++); + vs->max_quota = ntohl(*bp++); + vs->blocks_in_use = ntohl(*bp++); + vs->part_blocks_avail = ntohl(*bp++); + vs->part_max_blocks = ntohl(*bp++); + *_bp = bp; +} + +/* * deliver reply data to an FS.FetchStatus */ static int afs_deliver_fs_fetch_status(struct afs_call *call, @@ -175,7 +244,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack(&bp, vnode); if (call->reply2) xdr_decode_AFSVolSync(&bp, call->reply2); @@ -206,7 +275,7 @@ int afs_fs_fetch_file_status(struct afs_server *server, struct afs_call *call; __be32 *bp; - _enter(",%x,{%x:%d},,", + _enter(",%x,{%x:%u},,", key_serial(key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); @@ -247,9 +316,33 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 0: call->offset = 0; call->unmarshall++; + if (call->operation_ID != FSFETCHDATA64) { + call->unmarshall++; + goto no_msw; + } - /* extract the returned data length */ + /* extract the upper part of the returned data length of an + * FSFETCHDATA64 op (which should always be 0 using this + * client) */ case 1: + _debug("extract data length (MSW)"); + ret = afs_extract_data(call, skb, last, &call->tmp, 4); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->count = ntohl(call->tmp); + _debug("DATA length MSW: %u", call->count); + if (call->count > 0) + return -EBADMSG; + call->offset = 0; + call->unmarshall++; + + no_msw: + /* extract the returned data length */ + case 2: _debug("extract data length"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); switch (ret) { @@ -265,32 +358,27 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->offset = 0; call->unmarshall++; - if (call->count < PAGE_SIZE) { + /* extract the returned data */ + case 3: + _debug("extract data"); + if (call->count > 0) { page = call->reply3; buffer = kmap_atomic(page, KM_USER0); - memset(buffer + PAGE_SIZE - call->count, 0, - call->count); + ret = afs_extract_data(call, skb, last, buffer, + call->count); kunmap_atomic(buffer, KM_USER0); - } - - /* extract the returned data */ - case 2: - _debug("extract data"); - page = call->reply3; - buffer = kmap_atomic(page, KM_USER0); - ret = afs_extract_data(call, skb, last, buffer, call->count); - kunmap_atomic(buffer, KM_USER0); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } } call->offset = 0; call->unmarshall++; /* extract the metadata */ - case 3: + case 4: ret = afs_extract_data(call, skb, last, call->buffer, (21 + 3 + 6) * 4); switch (ret) { @@ -300,7 +388,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, } bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack(&bp, vnode); if (call->reply2) xdr_decode_AFSVolSync(&bp, call->reply2); @@ -308,7 +396,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->offset = 0; call->unmarshall++; - case 4: + case 5: _debug("trailer"); if (skb->len != 0) return -EBADMSG; @@ -318,6 +406,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, if (!last) return 0; + if (call->count < PAGE_SIZE) { + _debug("clear"); + page = call->reply3; + buffer = kmap_atomic(page, KM_USER0); + memset(buffer + call->count, 0, PAGE_SIZE - call->count); + kunmap_atomic(buffer, KM_USER0); + } + _leave(" = 0 [done]"); return 0; } @@ -332,6 +428,56 @@ static const struct afs_call_type afs_RXFSFetchData = { .destructor = afs_flat_call_destructor, }; +static const struct afs_call_type afs_RXFSFetchData64 = { + .name = "FS.FetchData64", + .deliver = afs_deliver_fs_fetch_data, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * fetch data from a very large file + */ +static int afs_fs_fetch_data64(struct afs_server *server, + struct key *key, + struct afs_vnode *vnode, + off_t offset, size_t length, + struct page *buffer, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + ASSERTCMP(length, <, ULONG_MAX); + + call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->reply2 = NULL; /* volsync */ + call->reply3 = buffer; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->operation_ID = FSFETCHDATA64; + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHDATA64); + bp[1] = htonl(vnode->fid.vid); + bp[2] = htonl(vnode->fid.vnode); + bp[3] = htonl(vnode->fid.unique); + bp[4] = htonl(upper_32_bits(offset)); + bp[5] = htonl((u32) offset); + bp[6] = 0; + bp[7] = htonl((u32) length); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + /* * fetch data from a file */ @@ -345,6 +491,10 @@ int afs_fs_fetch_data(struct afs_server *server, struct afs_call *call; __be32 *bp; + if (upper_32_bits(offset) || upper_32_bits(offset + length)) + return afs_fs_fetch_data64(server, key, vnode, offset, length, + buffer, wait_mode); + _enter(""); call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); @@ -357,6 +507,7 @@ int afs_fs_fetch_data(struct afs_server *server, call->reply3 = buffer; call->service_id = FS_SERVICE; call->port = htons(AFS_FS_PORT); + call->operation_ID = FSFETCHDATA; /* marshall the parameters */ bp = call->request; @@ -476,8 +627,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack_raw(&bp, call->reply4); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ @@ -574,7 +725,7 @@ static int afs_deliver_fs_remove(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -657,8 +808,8 @@ static int afs_deliver_fs_link(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); - xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -746,8 +897,8 @@ static int afs_deliver_fs_symlink(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -852,9 +1003,10 @@ static int afs_deliver_fs_rename(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode); + xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL); if (new_dvnode != orig_dvnode) - xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode); + xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, + NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -936,3 +1088,663 @@ int afs_fs_rename(struct afs_server *server, return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); } + +/* + * deliver reply data to an FS.StoreData + */ +static int afs_deliver_fs_store_data(struct afs_call *call, + struct sk_buff *skb, bool last) +{ + struct afs_vnode *vnode = call->reply; + const __be32 *bp; + + _enter(",,%u", last); + + afs_transfer_reply(call, skb); + if (!last) { + _leave(" = 0 [more]"); + return 0; + } + + if (call->reply_size != call->reply_max) { + _leave(" = -EBADMSG [%u != %u]", + call->reply_size, call->reply_max); + return -EBADMSG; + } + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, + &call->store_version); + /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + + afs_pages_written_back(vnode, call); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.StoreData operation type + */ +static const struct afs_call_type afs_RXFSStoreData = { + .name = "FS.StoreData", + .deliver = afs_deliver_fs_store_data, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSStoreData64 = { + .name = "FS.StoreData64", + .deliver = afs_deliver_fs_store_data, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * store a set of pages to a very large file + */ +static int afs_fs_store_data64(struct afs_server *server, + struct afs_writeback *wb, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to, + loff_t size, loff_t pos, loff_t i_size, + const struct afs_wait_mode *wait_mode) +{ + struct afs_vnode *vnode = wb->vnode; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(&afs_RXFSStoreData64, + (4 + 6 + 3 * 2) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->wb = wb; + call->key = wb->key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->mapping = vnode->vfs_inode.i_mapping; + call->first = first; + call->last = last; + call->first_offset = offset; + call->last_to = to; + call->send_pages = true; + call->store_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA64); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + *bp++ = 0; /* mask */ + *bp++ = 0; /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = 0; /* unix mode */ + *bp++ = 0; /* segment size */ + + *bp++ = htonl(pos >> 32); + *bp++ = htonl((u32) pos); + *bp++ = htonl(size >> 32); + *bp++ = htonl((u32) size); + *bp++ = htonl(i_size >> 32); + *bp++ = htonl((u32) i_size); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + +/* + * store a set of pages + */ +int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to, + const struct afs_wait_mode *wait_mode) +{ + struct afs_vnode *vnode = wb->vnode; + struct afs_call *call; + loff_t size, pos, i_size; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + + size = to - offset; + if (first != last) + size += (loff_t)(last - first) << PAGE_SHIFT; + pos = (loff_t)first << PAGE_SHIFT; + pos += offset; + + i_size = i_size_read(&vnode->vfs_inode); + if (pos + size > i_size) + i_size = size + pos; + + _debug("size %llx, at %llx, i_size %llx", + (unsigned long long) size, (unsigned long long) pos, + (unsigned long long) i_size); + + if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) + return afs_fs_store_data64(server, wb, first, last, offset, to, + size, pos, i_size, wait_mode); + + call = afs_alloc_flat_call(&afs_RXFSStoreData, + (4 + 6 + 3) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->wb = wb; + call->key = wb->key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->mapping = vnode->vfs_inode.i_mapping; + call->first = first; + call->last = last; + call->first_offset = offset; + call->last_to = to; + call->send_pages = true; + call->store_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + *bp++ = 0; /* mask */ + *bp++ = 0; /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = 0; /* unix mode */ + *bp++ = 0; /* segment size */ + + *bp++ = htonl(pos); + *bp++ = htonl(size); + *bp++ = htonl(i_size); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + +/* + * deliver reply data to an FS.StoreStatus + */ +static int afs_deliver_fs_store_status(struct afs_call *call, + struct sk_buff *skb, bool last) +{ + afs_dataversion_t *store_version; + struct afs_vnode *vnode = call->reply; + const __be32 *bp; + + _enter(",,%u", last); + + afs_transfer_reply(call, skb); + if (!last) { + _leave(" = 0 [more]"); + return 0; + } + + if (call->reply_size != call->reply_max) { + _leave(" = -EBADMSG [%u != %u]", + call->reply_size, call->reply_max); + return -EBADMSG; + } + + /* unmarshall the reply once we've received all of it */ + store_version = NULL; + if (call->operation_ID == FSSTOREDATA) + store_version = &call->store_version; + + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); + /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.StoreStatus operation type + */ +static const struct afs_call_type afs_RXFSStoreStatus = { + .name = "FS.StoreStatus", + .deliver = afs_deliver_fs_store_status, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSStoreData_as_Status = { + .name = "FS.StoreData", + .deliver = afs_deliver_fs_store_status, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSStoreData64_as_Status = { + .name = "FS.StoreData64", + .deliver = afs_deliver_fs_store_status, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * set the attributes on a very large file, using FS.StoreData rather than + * FS.StoreStatus so as to alter the file size also + */ +static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, + struct afs_vnode *vnode, struct iattr *attr, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(key), vnode->fid.vid, vnode->fid.vnode); + + ASSERT(attr->ia_valid & ATTR_SIZE); + + call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status, + (4 + 6 + 3 * 2) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->store_version = vnode->status.data_version + 1; + call->operation_ID = FSSTOREDATA; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA64); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + *bp++ = 0; /* position of start of write */ + *bp++ = 0; + *bp++ = 0; /* size of write */ + *bp++ = 0; + *bp++ = htonl(attr->ia_size >> 32); /* new file length */ + *bp++ = htonl((u32) attr->ia_size); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + +/* + * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus + * so as to alter the file size also + */ +static int afs_fs_setattr_size(struct afs_server *server, struct key *key, + struct afs_vnode *vnode, struct iattr *attr, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(key), vnode->fid.vid, vnode->fid.vnode); + + ASSERT(attr->ia_valid & ATTR_SIZE); + if (attr->ia_size >> 32) + return afs_fs_setattr_size64(server, key, vnode, attr, + wait_mode); + + call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status, + (4 + 6 + 3) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->store_version = vnode->status.data_version + 1; + call->operation_ID = FSSTOREDATA; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + *bp++ = 0; /* position of start of write */ + *bp++ = 0; /* size of write */ + *bp++ = htonl(attr->ia_size); /* new file length */ + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + +/* + * set the attributes on a file, using FS.StoreData if there's a change in file + * size, and FS.StoreStatus otherwise + */ +int afs_fs_setattr(struct afs_server *server, struct key *key, + struct afs_vnode *vnode, struct iattr *attr, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + if (attr->ia_valid & ATTR_SIZE) + return afs_fs_setattr_size(server, key, vnode, attr, + wait_mode); + + _enter(",%x,{%x:%u},,", + key_serial(key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(&afs_RXFSStoreStatus, + (4 + 6) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->operation_ID = FSSTORESTATUS; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTORESTATUS); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + +/* + * deliver reply data to an FS.GetVolumeStatus + */ +static int afs_deliver_fs_get_volume_status(struct afs_call *call, + struct sk_buff *skb, bool last) +{ + const __be32 *bp; + char *p; + int ret; + + _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* extract the returned status record */ + case 1: + _debug("extract status"); + ret = afs_extract_data(call, skb, last, call->buffer, + 12 * 4); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + bp = call->buffer; + xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); + call->offset = 0; + call->unmarshall++; + + /* extract the volume name length */ + case 2: + ret = afs_extract_data(call, skb, last, &call->tmp, 4); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->count = ntohl(call->tmp); + _debug("volname length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return -EBADMSG; + call->offset = 0; + call->unmarshall++; + + /* extract the volume name */ + case 3: + _debug("extract volname"); + if (call->count > 0) { + ret = afs_extract_data(call, skb, last, call->reply3, + call->count); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + } + + p = call->reply3; + p[call->count] = 0; + _debug("volname '%s'", p); + + call->offset = 0; + call->unmarshall++; + + /* extract the volume name padding */ + if ((call->count & 3) == 0) { + call->unmarshall++; + goto no_volname_padding; + } + call->count = 4 - (call->count & 3); + + case 4: + ret = afs_extract_data(call, skb, last, call->buffer, + call->count); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->offset = 0; + call->unmarshall++; + no_volname_padding: + + /* extract the offline message length */ + case 5: + ret = afs_extract_data(call, skb, last, &call->tmp, 4); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->count = ntohl(call->tmp); + _debug("offline msg length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return -EBADMSG; + call->offset = 0; + call->unmarshall++; + + /* extract the offline message */ + case 6: + _debug("extract offline"); + if (call->count > 0) { + ret = afs_extract_data(call, skb, last, call->reply3, + call->count); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + } + + p = call->reply3; + p[call->count] = 0; + _debug("offline '%s'", p); + + call->offset = 0; + call->unmarshall++; + + /* extract the offline message padding */ + if ((call->count & 3) == 0) { + call->unmarshall++; + goto no_offline_padding; + } + call->count = 4 - (call->count & 3); + + case 7: + ret = afs_extract_data(call, skb, last, call->buffer, + call->count); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->offset = 0; + call->unmarshall++; + no_offline_padding: + + /* extract the message of the day length */ + case 8: + ret = afs_extract_data(call, skb, last, &call->tmp, 4); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->count = ntohl(call->tmp); + _debug("motd length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return -EBADMSG; + call->offset = 0; + call->unmarshall++; + + /* extract the message of the day */ + case 9: + _debug("extract motd"); + if (call->count > 0) { + ret = afs_extract_data(call, skb, last, call->reply3, + call->count); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + } + + p = call->reply3; + p[call->count] = 0; + _debug("motd '%s'", p); + + call->offset = 0; + call->unmarshall++; + + /* extract the message of the day padding */ + if ((call->count & 3) == 0) { + call->unmarshall++; + goto no_motd_padding; + } + call->count = 4 - (call->count & 3); + + case 10: + ret = afs_extract_data(call, skb, last, call->buffer, + call->count); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->offset = 0; + call->unmarshall++; + no_motd_padding: + + case 11: + _debug("trailer %d", skb->len); + if (skb->len != 0) + return -EBADMSG; + break; + } + + if (!last) + return 0; + + _leave(" = 0 [done]"); + return 0; +} + +/* + * destroy an FS.GetVolumeStatus call + */ +static void afs_get_volume_status_call_destructor(struct afs_call *call) +{ + kfree(call->reply3); + call->reply3 = NULL; + afs_flat_call_destructor(call); +} + +/* + * FS.GetVolumeStatus operation type + */ +static const struct afs_call_type afs_RXFSGetVolumeStatus = { + .name = "FS.GetVolumeStatus", + .deliver = afs_deliver_fs_get_volume_status, + .abort_to_error = afs_abort_to_error, + .destructor = afs_get_volume_status_call_destructor, +}; + +/* + * fetch the status of a volume + */ +int afs_fs_get_volume_status(struct afs_server *server, + struct key *key, + struct afs_vnode *vnode, + struct afs_volume_status *vs, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + void *tmpbuf; + + _enter(""); + + tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); + if (!call) { + kfree(tmpbuf); + return -ENOMEM; + } + + call->key = key; + call->reply = vnode; + call->reply2 = vs; + call->reply3 = tmpbuf; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSGETVOLUMESTATUS); + bp[1] = htonl(vnode->fid.vid); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c184a4ee599..d196840127c 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> +#include <linux/sched.h> #include "internal.h" struct afs_iget_data { @@ -125,7 +126,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, struct inode *inode; int ret; - _enter(",{%u,%u,%u},,", fid->vid, fid->vnode, fid->unique); + _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique); as = sb->s_fs_info; data.volume = as->volume; @@ -204,6 +205,23 @@ bad_inode: } /* + * mark the data attached to an inode as obsolete due to a write on the server + * - might also want to ditch all the outstanding writes and dirty pages + */ +void afs_zap_data(struct afs_vnode *vnode) +{ + _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + + /* nuke all the non-dirty pages that aren't locked, mapped or being + * written back in a regular file and completely discard the pages in a + * directory or symlink */ + if (S_ISREG(vnode->vfs_inode.i_mode)) + invalidate_remote_inode(&vnode->vfs_inode); + else + invalidate_inode_pages2(vnode->vfs_inode.i_mapping); +} + +/* * validate a vnode/inode * - there are several things we need to check * - parent dir data changes (rm, rmdir, rename, mkdir, create, link, @@ -258,10 +276,8 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) /* if the vnode's data version number changed then its contents are * different */ - if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - _debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode); - invalidate_remote_inode(&vnode->vfs_inode); - } + if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) + afs_zap_data(vnode); clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); mutex_unlock(&vnode->validate_lock); @@ -278,7 +294,7 @@ error_unlock: /* * read the attributes of an inode */ -int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, +int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode; @@ -301,7 +317,7 @@ void afs_clear_inode(struct inode *inode) vnode = AFS_FS_I(inode); - _enter("{%x:%d.%d} v=%u x=%u t=%u }", + _enter("{%x:%u.%d} v=%u x=%u t=%u }", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, @@ -323,6 +339,7 @@ void afs_clear_inode(struct inode *inode) vnode->server = NULL; } + ASSERT(list_empty(&vnode->writebacks)); ASSERT(!vnode->cb_promised); #ifdef AFS_CACHING_SUPPORT @@ -339,3 +356,47 @@ void afs_clear_inode(struct inode *inode) _leave(""); } + +/* + * set the attributes of an inode + */ +int afs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + struct key *key; + int ret; + + _enter("{%x:%u},{n=%s},%x", + vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, + attr->ia_valid); + + if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | + ATTR_MTIME))) { + _leave(" = 0 [unsupported]"); + return 0; + } + + /* flush any dirty data outstanding on a regular file */ + if (S_ISREG(vnode->vfs_inode.i_mode)) { + filemap_write_and_wait(vnode->vfs_inode.i_mapping); + afs_writeback_all(vnode); + } + + if (attr->ia_valid & ATTR_FILE) { + key = attr->ia_file->private_data; + } else { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error; + } + } + + ret = afs_vnode_setattr(vnode, key, attr); + if (!(attr->ia_valid & ATTR_FILE)) + key_put(key); + +error: + _leave(" = %d", ret); + return ret; +} diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d90c158cd93..2c55dd94a1d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -16,11 +16,15 @@ #include <linux/skbuff.h> #include <linux/rxrpc.h> #include <linux/key.h> +#include <linux/workqueue.h> +#include <linux/sched.h> + #include "afs.h" #include "afs_vl.h" #define AFS_CELL_MAX_ADDRS 15 +struct pagevec; struct afs_call; typedef enum { @@ -75,12 +79,15 @@ struct afs_call { struct key *key; /* security for this call */ struct afs_server *server; /* server affected by incoming CM call */ void *request; /* request data (first part) */ - void *request2; /* request data (second part) */ + struct address_space *mapping; /* page set */ + struct afs_writeback *wb; /* writeback being performed */ void *buffer; /* reply receive buffer */ void *reply; /* reply buffer (first part) */ void *reply2; /* reply buffer (second part) */ void *reply3; /* reply buffer (third part) */ void *reply4; /* reply buffer (fourth part) */ + pgoff_t first; /* first page in mapping to deal with */ + pgoff_t last; /* last page in mapping to deal with */ enum { /* call state */ AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ @@ -97,14 +104,18 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned reply_size; /* current size of reply */ + unsigned first_offset; /* offset into mapping[first] */ + unsigned last_to; /* amount of mapping[last] */ unsigned short offset; /* offset into received data store */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ + bool send_pages; /* T if data from mapping should be sent */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ __be32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ + afs_dataversion_t store_version; /* updated version expected from store */ }; struct afs_call_type { @@ -124,6 +135,32 @@ struct afs_call_type { }; /* + * record of an outstanding writeback on a vnode + */ +struct afs_writeback { + struct list_head link; /* link in vnode->writebacks */ + struct work_struct writer; /* work item to perform the writeback */ + struct afs_vnode *vnode; /* vnode to which this write applies */ + struct key *key; /* owner of this write */ + wait_queue_head_t waitq; /* completion and ready wait queue */ + pgoff_t first; /* first page in batch */ + pgoff_t point; /* last page in current store op */ + pgoff_t last; /* last page in batch (inclusive) */ + unsigned offset_first; /* offset into first page of start of write */ + unsigned to_last; /* offset into last page of end of write */ + int num_conflicts; /* count of conflicting writes in list */ + int usage; + bool conflicts; /* T if has dependent conflicts */ + enum { + AFS_WBACK_SYNCING, /* synchronisation being performed */ + AFS_WBACK_PENDING, /* write pending */ + AFS_WBACK_CONFLICTING, /* conflicting writes posted */ + AFS_WBACK_WRITING, /* writing back */ + AFS_WBACK_COMPLETE /* the writeback record has been unlinked */ + } state __attribute__((packed)); +}; + +/* * AFS superblock private data * - there's one superblock per volume */ @@ -305,6 +342,7 @@ struct afs_vnode { wait_queue_head_t update_waitq; /* status fetch waitqueue */ int update_cnt; /* number of outstanding ops that will update the * status */ + spinlock_t writeback_lock; /* lock for writebacks */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; #define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ @@ -316,6 +354,8 @@ struct afs_vnode { long acl_order; /* ACL check count (callback break count) */ + struct list_head writebacks; /* alterations in pagecache that need writing */ + /* outstanding callback notification on this file */ struct rb_node server_rb; /* link in server->fs_vnodes */ struct rb_node cb_promise; /* link in server->cb_promises */ @@ -433,10 +473,6 @@ extern const struct file_operations afs_file_operations; extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); -#ifdef AFS_CACHING_SUPPORT -extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **); -#endif - /* * fsclient.c */ @@ -467,6 +503,16 @@ extern int afs_fs_rename(struct afs_server *, struct key *, struct afs_vnode *, const char *, struct afs_vnode *, const char *, const struct afs_wait_mode *); +extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *, + pgoff_t, pgoff_t, unsigned, unsigned, + const struct afs_wait_mode *); +extern int afs_fs_setattr(struct afs_server *, struct key *, + struct afs_vnode *, struct iattr *, + const struct afs_wait_mode *); +extern int afs_fs_get_volume_status(struct afs_server *, struct key *, + struct afs_vnode *, + struct afs_volume_status *, + const struct afs_wait_mode *); /* * inode.c @@ -474,10 +520,10 @@ extern int afs_fs_rename(struct afs_server *, struct key *, extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); -extern int afs_inode_getattr(struct vfsmount *, struct dentry *, - struct kstat *); -extern void afs_zap_permits(struct rcu_head *); +extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int afs_setattr(struct dentry *, struct iattr *); extern void afs_clear_inode(struct inode *); /* @@ -533,6 +579,7 @@ extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, */ extern void afs_clear_permits(struct afs_vnode *); extern void afs_cache_permit(struct afs_vnode *, struct key *, long); +extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); extern int afs_permission(struct inode *, int, struct nameidata *); @@ -629,6 +676,11 @@ extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *, struct afs_file_status *, struct afs_server **); extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, struct key *, const char *, const char *); +extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, + unsigned, unsigned); +extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); +extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, + struct afs_volume_status *); /* * volume.c @@ -645,6 +697,23 @@ extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); extern int afs_volume_release_fileserver(struct afs_vnode *, struct afs_server *, int); +/* + * write.c + */ +extern int afs_set_page_dirty(struct page *); +extern void afs_put_writeback(struct afs_writeback *); +extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned); +extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned); +extern int afs_writepage(struct page *, struct writeback_control *); +extern int afs_writepages(struct address_space *, struct writeback_control *); +extern int afs_write_inode(struct inode *, int); +extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); +extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, + unsigned long, loff_t); +extern int afs_writeback_all(struct afs_vnode *); +extern int afs_fsync(struct file *, struct dentry *, int); + + /*****************************************************************************/ /* * debug tracing @@ -726,6 +795,21 @@ do { \ } \ } while(0) +#define ASSERTRANGE(L, OP1, N, OP2, H) \ +do { \ + if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "AFS: Assertion failed\n"); \ + printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \ + (unsigned long)(L), (unsigned long)(N), \ + (unsigned long)(H)); \ + printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \ + (unsigned long)(L), (unsigned long)(N), \ + (unsigned long)(H)); \ + BUG(); \ + } \ +} while(0) + #define ASSERTIF(C, X) \ do { \ if (unlikely((C) && !(X))) { \ @@ -758,6 +842,10 @@ do { \ do { \ } while(0) +#define ASSERTRANGE(L, OP1, N, OP2, H) \ +do { \ +} while(0) + #define ASSERTIF(C, X) \ do { \ } while(0) diff --git a/fs/afs/main.c b/fs/afs/main.c index 80ec6fd19a7..cd21195bbb2 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -13,6 +13,7 @@ #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/completion.h> +#include <linux/sched.h> #include "internal.h" MODULE_DESCRIPTION("AFS Client File System"); @@ -149,6 +150,7 @@ error_cache: afs_vlocation_purge(); afs_cell_purge(); afs_proc_cleanup(); + rcu_barrier(); printk(KERN_ERR "kAFS: failed to register: %d\n", ret); return ret; } @@ -176,6 +178,7 @@ static void __exit afs_exit(void) cachefs_unregister_netfs(&afs_cache_netfs); #endif afs_proc_cleanup(); + rcu_barrier(); } module_exit(afs_exit); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index cdb9792d816..d1a889c4074 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -22,6 +22,7 @@ int afs_abort_to_error(u32 abort_code) { switch (abort_code) { case 13: return -EACCES; + case 27: return -EFBIG; case 30: return -EROFS; case VSALVAGE: return -EIO; case VNOVNODE: return -ENOENT; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 034fcfd4e33..a3684dcc76e 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -36,7 +36,7 @@ const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, .follow_link = afs_mntpt_follow_link, .readlink = page_readlink, - .getattr = afs_inode_getattr, + .getattr = afs_getattr, }; static LIST_HEAD(afs_vfsmounts); @@ -58,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) char *buf; int ret; - _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique); + _enter("{%x:%u,%u}", + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); /* read the contents of the symlink into the pagecache */ page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index d5601f617cd..13df512aea9 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/sched.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 222c1a3abbb..1b36f45076a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -237,6 +237,70 @@ void afs_flat_call_destructor(struct afs_call *call) } /* + * attach the data from a bunch of pages on an inode to a call + */ +int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov) +{ + struct page *pages[8]; + unsigned count, n, loop, offset, to; + pgoff_t first = call->first, last = call->last; + int ret; + + _enter(""); + + offset = call->first_offset; + call->first_offset = 0; + + do { + _debug("attach %lx-%lx", first, last); + + count = last - first + 1; + if (count > ARRAY_SIZE(pages)) + count = ARRAY_SIZE(pages); + n = find_get_pages_contig(call->mapping, first, count, pages); + ASSERTCMP(n, ==, count); + + loop = 0; + do { + msg->msg_flags = 0; + to = PAGE_SIZE; + if (first + loop >= last) + to = call->last_to; + else + msg->msg_flags = MSG_MORE; + iov->iov_base = kmap(pages[loop]) + offset; + iov->iov_len = to - offset; + offset = 0; + + _debug("- range %u-%u%s", + offset, to, msg->msg_flags ? " [more]" : ""); + msg->msg_iov = (struct iovec *) iov; + msg->msg_iovlen = 1; + + /* have to change the state *before* sending the last + * packet as RxRPC might give us the reply before it + * returns from sending the request */ + if (first + loop >= last) + call->state = AFS_CALL_AWAIT_REPLY; + ret = rxrpc_kernel_send_data(call->rxcall, msg, + to - offset); + kunmap(pages[loop]); + if (ret < 0) + break; + } while (++loop < count); + first += count; + + for (loop = 0; loop < count; loop++) + put_page(pages[loop]); + if (ret < 0) + break; + } while (first <= last); + + _leave(" = %d", ret); + return ret; +} + +/* * initiate a call */ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, @@ -253,8 +317,9 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, ASSERT(call->type != NULL); ASSERT(call->type->name != NULL); - _debug("MAKE %p{%s} [%d]", - call, call->type->name, atomic_read(&afs_outstanding_calls)); + _debug("____MAKE %p{%s,%x} [%d]____", + call, call->type->name, key_serial(call->key), + atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; INIT_WORK(&call->async_work, afs_process_async_call); @@ -289,16 +354,23 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = 0; + msg.msg_flags = (call->send_pages ? MSG_MORE : 0); /* have to change the state *before* sending the last packet as RxRPC * might give us the reply before it returns from sending the * request */ - call->state = AFS_CALL_AWAIT_REPLY; + if (!call->send_pages) + call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); if (ret < 0) goto error_do_abort; + if (call->send_pages) { + ret = afs_send_pages(call, &msg, iov); + if (ret < 0) + goto error_do_abort; + } + /* at this point, an async call may no longer exist as it may have * already completed */ return wait_mode->wait(call); diff --git a/fs/afs/security.c b/fs/afs/security.c index f9f424d8045..566fe712c68 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/ctype.h> +#include <linux/sched.h> #include <keys/rxrpc-type.h> #include "internal.h" @@ -109,7 +110,7 @@ void afs_clear_permits(struct afs_vnode *vnode) { struct afs_permits *permits; - _enter("{%x}", vnode->fid.vnode); + _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); mutex_lock(&vnode->permits_lock); permits = vnode->permits; @@ -132,7 +133,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order) struct afs_vnode *auth_vnode; int count, loop; - _enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order); + _enter("{%x:%u},%x,%lx", + vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order); auth_vnode = afs_get_auth_inode(vnode, key); if (IS_ERR(auth_vnode)) { @@ -220,7 +222,8 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, bool valid; int loop, ret; - _enter(""); + _enter("{%x:%u},%x", + vnode->fid.vid, vnode->fid.vnode, key_serial(key)); auth_vnode = afs_get_auth_inode(vnode, key); if (IS_ERR(auth_vnode)) { @@ -268,9 +271,9 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, _leave(" = %d", ret); return ret; } + *_access = vnode->status.caller_access; } - *_access = vnode->status.caller_access; iput(&auth_vnode->vfs_inode); _leave(" = 0 [access %x]", *_access); return 0; @@ -288,7 +291,7 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd) struct key *key; int ret; - _enter("{{%x:%x},%lx},%x,", + _enter("{{%x:%u},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); key = afs_request_key(vnode->volume->cell); diff --git a/fs/afs/server.c b/fs/afs/server.c index 96bb23b476a..231ae415027 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -252,6 +252,9 @@ static void afs_destroy_server(struct afs_server *server) { _enter("%p", server); + ASSERTIF(server->cb_break_head != server->cb_break_tail, + delayed_work_pending(&server->cb_break_work)); + ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); ASSERTCMP(server->cb_promises.rb_node, ==, NULL); ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); diff --git a/fs/afs/super.c b/fs/afs/super.c index 7030d76155f..2e8496ba120 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -21,22 +21,21 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/parser.h> +#include <linux/statfs.h> +#include <linux/sched.h> #include "internal.h" #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ static void afs_i_init_once(void *foo, struct kmem_cache *cachep, unsigned long flags); - static int afs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt); - static struct inode *afs_alloc_inode(struct super_block *sb); - static void afs_put_super(struct super_block *sb); - static void afs_destroy_inode(struct inode *inode); +static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); struct file_system_type afs_fs_type = { .owner = THIS_MODULE, @@ -47,9 +46,9 @@ struct file_system_type afs_fs_type = { }; static const struct super_operations afs_super_ops = { - .statfs = simple_statfs, + .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, - .drop_inode = generic_delete_inode, + .write_inode = afs_write_inode, .destroy_inode = afs_destroy_inode, .clear_inode = afs_clear_inode, .umount_begin = afs_umount_begin, @@ -66,7 +65,7 @@ enum { afs_opt_vol, }; -static const match_table_t afs_options_list = { +static match_table_t afs_options_list = { { afs_opt_cell, "cell=%s" }, { afs_opt_rwpath, "rwpath" }, { afs_opt_vol, "vol=%s" }, @@ -453,15 +452,15 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, { struct afs_vnode *vnode = _vnode; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - memset(vnode, 0, sizeof(*vnode)); - inode_init_once(&vnode->vfs_inode); - init_waitqueue_head(&vnode->update_waitq); - mutex_init(&vnode->permits_lock); - mutex_init(&vnode->validate_lock); - spin_lock_init(&vnode->lock); - INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); - } + memset(vnode, 0, sizeof(*vnode)); + inode_init_once(&vnode->vfs_inode); + init_waitqueue_head(&vnode->update_waitq); + mutex_init(&vnode->permits_lock); + mutex_init(&vnode->validate_lock); + spin_lock_init(&vnode->writeback_lock); + spin_lock_init(&vnode->lock); + INIT_LIST_HEAD(&vnode->writebacks); + INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); } /* @@ -485,6 +484,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) vnode->flags = 1 << AFS_VNODE_UNSET; vnode->cb_promised = false; + _leave(" = %p", &vnode->vfs_inode); return &vnode->vfs_inode; } @@ -495,7 +495,7 @@ static void afs_destroy_inode(struct inode *inode) { struct afs_vnode *vnode = AFS_FS_I(inode); - _enter("{%lu}", inode->i_ino); + _enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode); _debug("DESTROY INODE %p", inode); @@ -504,3 +504,36 @@ static void afs_destroy_inode(struct inode *inode) kmem_cache_free(afs_inode_cachep, vnode); atomic_dec(&afs_count_active_inodes); } + +/* + * return information about an AFS volume + */ +static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct afs_volume_status vs; + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + struct key *key; + int ret; + + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + + ret = afs_vnode_get_volume_status(vnode, key, &vs); + key_put(key); + if (ret < 0) { + _leave(" = %d", ret); + return ret; + } + + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = AFS_BLOCK_SIZE; + buf->f_namelen = AFSNAMEMAX - 1; + + if (vs.max_quota == 0) + buf->f_blocks = vs.part_max_blocks; + else + buf->f_blocks = vs.max_quota; + buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; + return 0; +} diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 3370cdb7256..09e3ad0fc7c 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/sched.h> #include "internal.h" unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index a1904ab8426..232c55dc245 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/fs.h> +#include <linux/sched.h> #include "internal.h" #if 0 @@ -175,24 +176,33 @@ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) { struct afs_server *server; + _enter("{%p}", vnode->server); + set_bit(AFS_VNODE_DELETED, &vnode->flags); server = vnode->server; - if (vnode->cb_promised) { - spin_lock(&server->cb_lock); + if (server) { if (vnode->cb_promised) { - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; + spin_lock(&server->cb_lock); + if (vnode->cb_promised) { + rb_erase(&vnode->cb_promise, + &server->cb_promises); + vnode->cb_promised = false; + } + spin_unlock(&server->cb_lock); } - spin_unlock(&server->cb_lock); - } - spin_lock(&vnode->server->fs_lock); - rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes); - spin_unlock(&vnode->server->fs_lock); + spin_lock(&server->fs_lock); + rb_erase(&vnode->server_rb, &server->fs_vnodes); + spin_unlock(&server->fs_lock); - vnode->server = NULL; - afs_put_server(server); + vnode->server = NULL; + afs_put_server(server); + } else { + ASSERT(!vnode->cb_promised); + } + + _leave(""); } /* @@ -225,7 +235,7 @@ void afs_vnode_finalise_status_update(struct afs_vnode *vnode, */ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) { - _enter("%p,%d", vnode, ret); + _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret); spin_lock(&vnode->lock); @@ -261,7 +271,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, DECLARE_WAITQUEUE(myself, current); - _enter("%s,{%u,%u,%u}", + _enter("%s,{%x:%u.%u}", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); @@ -389,7 +399,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,,,", + _enter("%s{%x:%u.%u},%x,,,", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -446,7 +456,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,%s,,", + _enter("%s{%x:%u.%u},%x,%s,,", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -502,7 +512,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,%s", + _enter("%s{%x:%u.%u},%x,%s", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -557,7 +567,7 @@ extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s", + _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s", dvnode->volume->vlocation->vldb.name, dvnode->fid.vid, dvnode->fid.vnode, @@ -628,7 +638,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,%s,%s,,,", + _enter("%s{%x:%u.%u},%x,%s,%s,,,", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -687,7 +697,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s", + _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s", orig_dvnode->volume->vlocation->vldb.name, orig_dvnode->fid.vid, orig_dvnode->fid.vnode, @@ -753,3 +763,162 @@ no_server: _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt); return PTR_ERR(server); } + +/* + * write to a file + */ +int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, + unsigned offset, unsigned to) +{ + struct afs_server *server; + struct afs_vnode *vnode = wb->vnode; + int ret; + + _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", + vnode->volume->vlocation->vldb.name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(wb->key), + first, last, offset, to); + + /* this op will fetch the status */ + spin_lock(&vnode->lock); + vnode->update_cnt++; + spin_unlock(&vnode->lock); + + do { + /* pick a server to query */ + server = afs_volume_pick_fileserver(vnode); + if (IS_ERR(server)) + goto no_server; + + _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + + ret = afs_fs_store_data(server, wb, first, last, offset, to, + &afs_sync_call); + + } while (!afs_volume_release_fileserver(vnode, server, ret)); + + /* adjust the flags */ + if (ret == 0) { + afs_vnode_finalise_status_update(vnode, server); + afs_put_server(server); + } else { + afs_vnode_status_update_failed(vnode, ret); + } + + _leave(" = %d", ret); + return ret; + +no_server: + spin_lock(&vnode->lock); + vnode->update_cnt--; + ASSERTCMP(vnode->update_cnt, >=, 0); + spin_unlock(&vnode->lock); + return PTR_ERR(server); +} + +/* + * set the attributes on a file + */ +int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, + struct iattr *attr) +{ + struct afs_server *server; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->vlocation->vldb.name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + /* this op will fetch the status */ + spin_lock(&vnode->lock); + vnode->update_cnt++; + spin_unlock(&vnode->lock); + + do { + /* pick a server to query */ + server = afs_volume_pick_fileserver(vnode); + if (IS_ERR(server)) + goto no_server; + + _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + + ret = afs_fs_setattr(server, key, vnode, attr, &afs_sync_call); + + } while (!afs_volume_release_fileserver(vnode, server, ret)); + + /* adjust the flags */ + if (ret == 0) { + afs_vnode_finalise_status_update(vnode, server); + afs_put_server(server); + } else { + afs_vnode_status_update_failed(vnode, ret); + } + + _leave(" = %d", ret); + return ret; + +no_server: + spin_lock(&vnode->lock); + vnode->update_cnt--; + ASSERTCMP(vnode->update_cnt, >=, 0); + spin_unlock(&vnode->lock); + return PTR_ERR(server); +} + +/* + * get the status of a volume + */ +int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, + struct afs_volume_status *vs) +{ + struct afs_server *server; + int ret; + + _enter("%s{%x:%u.%u},%x,", + vnode->volume->vlocation->vldb.name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + /* this op will fetch the status */ + spin_lock(&vnode->lock); + vnode->update_cnt++; + spin_unlock(&vnode->lock); + + do { + /* pick a server to query */ + server = afs_volume_pick_fileserver(vnode); + if (IS_ERR(server)) + goto no_server; + + _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + + ret = afs_fs_get_volume_status(server, key, vnode, vs, &afs_sync_call); + + } while (!afs_volume_release_fileserver(vnode, server, ret)); + + /* adjust the flags */ + if (ret == 0) { + afs_vnode_finalise_status_update(vnode, server); + afs_put_server(server); + } else { + afs_vnode_status_update_failed(vnode, ret); + } + + _leave(" = %d", ret); + return ret; + +no_server: + spin_lock(&vnode->lock); + vnode->update_cnt--; + ASSERTCMP(vnode->update_cnt, >=, 0); + spin_unlock(&vnode->lock); + return PTR_ERR(server); +} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index dd160cada45..8bab0e3437f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> +#include <linux/sched.h> #include "internal.h" static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; diff --git a/fs/afs/write.c b/fs/afs/write.c new file mode 100644 index 00000000000..a03b92a0fe1 --- /dev/null +++ b/fs/afs/write.c @@ -0,0 +1,827 @@ +/* handling of writes to regular files and writing back to the server + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/writeback.h> +#include <linux/pagevec.h> +#include "internal.h" + +static int afs_write_back_from_locked_page(struct afs_writeback *wb, + struct page *page); + +/* + * mark a page as having been made dirty and thus needing writeback + */ +int afs_set_page_dirty(struct page *page) +{ + _enter(""); + return __set_page_dirty_nobuffers(page); +} + +/* + * unlink a writeback record because its usage has reached zero + * - must be called with the wb->vnode->writeback_lock held + */ +static void afs_unlink_writeback(struct afs_writeback *wb) +{ + struct afs_writeback *front; + struct afs_vnode *vnode = wb->vnode; + + list_del_init(&wb->link); + if (!list_empty(&vnode->writebacks)) { + /* if an fsync rises to the front of the queue then wake it + * up */ + front = list_entry(vnode->writebacks.next, + struct afs_writeback, link); + if (front->state == AFS_WBACK_SYNCING) { + _debug("wake up sync"); + front->state = AFS_WBACK_COMPLETE; + wake_up(&front->waitq); + } + } +} + +/* + * free a writeback record + */ +static void afs_free_writeback(struct afs_writeback *wb) +{ + _enter(""); + key_put(wb->key); + kfree(wb); +} + +/* + * dispose of a reference to a writeback record + */ +void afs_put_writeback(struct afs_writeback *wb) +{ + struct afs_vnode *vnode = wb->vnode; + + _enter("{%d}", wb->usage); + + spin_lock(&vnode->writeback_lock); + if (--wb->usage == 0) + afs_unlink_writeback(wb); + else + wb = NULL; + spin_unlock(&vnode->writeback_lock); + if (wb) + afs_free_writeback(wb); +} + +/* + * partly or wholly fill a page that's under preparation for writing + */ +static int afs_fill_page(struct afs_vnode *vnode, struct key *key, + unsigned start, unsigned len, struct page *page) +{ + int ret; + + _enter(",,%u,%u", start, len); + + ASSERTCMP(start + len, <=, PAGE_SIZE); + + ret = afs_vnode_fetch_data(vnode, key, start, len, page); + if (ret < 0) { + if (ret == -ENOENT) { + _debug("got NOENT from server" + " - marking file deleted and stale"); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } + } + + _leave(" = %d", ret); + return ret; +} + +/* + * prepare a page for being written to + */ +static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, + struct key *key, unsigned offset, unsigned to) +{ + unsigned eof, tail, start, stop, len; + loff_t i_size, pos; + void *p; + int ret; + + _enter(""); + + if (offset == 0 && to == PAGE_SIZE) + return 0; + + p = kmap_atomic(page, KM_USER0); + + i_size = i_size_read(&vnode->vfs_inode); + pos = (loff_t) page->index << PAGE_SHIFT; + if (pos >= i_size) { + /* partial write, page beyond EOF */ + _debug("beyond"); + if (offset > 0) + memset(p, 0, offset); + if (to < PAGE_SIZE) + memset(p + to, 0, PAGE_SIZE - to); + kunmap_atomic(p, KM_USER0); + return 0; + } + + if (i_size - pos >= PAGE_SIZE) { + /* partial write, page entirely before EOF */ + _debug("before"); + tail = eof = PAGE_SIZE; + } else { + /* partial write, page overlaps EOF */ + eof = i_size - pos; + _debug("overlap %u", eof); + tail = max(eof, to); + if (tail < PAGE_SIZE) + memset(p + tail, 0, PAGE_SIZE - tail); + if (offset > eof) + memset(p + eof, 0, PAGE_SIZE - eof); + } + + kunmap_atomic(p, KM_USER0); + + ret = 0; + if (offset > 0 || eof > to) { + /* need to fill one or two bits that aren't going to be written + * (cover both fillers in one read if there are two) */ + start = (offset > 0) ? 0 : to; + stop = (eof > to) ? eof : offset; + len = stop - start; + _debug("wr=%u-%u av=0-%u rd=%u@%u", + offset, to, eof, start, len); + ret = afs_fill_page(vnode, key, start, len, page); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * prepare to perform part of a write to a page + * - the caller holds the page locked, preventing it from being written out or + * modified by anyone else + */ +int afs_prepare_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + struct afs_writeback *candidate, *wb; + struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + struct key *key = file->private_data; + pgoff_t index; + int ret; + + _enter("{%x:%u},{%lx},%u,%u", + vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); + + candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); + if (!candidate) + return -ENOMEM; + candidate->vnode = vnode; + candidate->first = candidate->last = page->index; + candidate->offset_first = offset; + candidate->to_last = to; + candidate->usage = 1; + candidate->state = AFS_WBACK_PENDING; + init_waitqueue_head(&candidate->waitq); + + if (!PageUptodate(page)) { + _debug("not up to date"); + ret = afs_prepare_page(vnode, page, key, offset, to); + if (ret < 0) { + kfree(candidate); + _leave(" = %d [prep]", ret); + return ret; + } + } + +try_again: + index = page->index; + spin_lock(&vnode->writeback_lock); + + /* see if this page is already pending a writeback under a suitable key + * - if so we can just join onto that one */ + wb = (struct afs_writeback *) page_private(page); + if (wb) { + if (wb->key == key && wb->state == AFS_WBACK_PENDING) + goto subsume_in_current_wb; + goto flush_conflicting_wb; + } + + if (index > 0) { + /* see if we can find an already pending writeback that we can + * append this page to */ + list_for_each_entry(wb, &vnode->writebacks, link) { + if (wb->last == index - 1 && wb->key == key && + wb->state == AFS_WBACK_PENDING) + goto append_to_previous_wb; + } + } + + list_add_tail(&candidate->link, &vnode->writebacks); + candidate->key = key_get(key); + spin_unlock(&vnode->writeback_lock); + SetPagePrivate(page); + set_page_private(page, (unsigned long) candidate); + _leave(" = 0 [new]"); + return 0; + +subsume_in_current_wb: + _debug("subsume"); + ASSERTRANGE(wb->first, <=, index, <=, wb->last); + if (index == wb->first && offset < wb->offset_first) + wb->offset_first = offset; + if (index == wb->last && to > wb->to_last) + wb->to_last = to; + spin_unlock(&vnode->writeback_lock); + kfree(candidate); + _leave(" = 0 [sub]"); + return 0; + +append_to_previous_wb: + _debug("append into %lx-%lx", wb->first, wb->last); + wb->usage++; + wb->last++; + wb->to_last = to; + spin_unlock(&vnode->writeback_lock); + SetPagePrivate(page); + set_page_private(page, (unsigned long) wb); + kfree(candidate); + _leave(" = 0 [app]"); + return 0; + + /* the page is currently bound to another context, so if it's dirty we + * need to flush it before we can use the new context */ +flush_conflicting_wb: + _debug("flush conflict"); + if (wb->state == AFS_WBACK_PENDING) + wb->state = AFS_WBACK_CONFLICTING; + spin_unlock(&vnode->writeback_lock); + if (PageDirty(page)) { + ret = afs_write_back_from_locked_page(wb, page); + if (ret < 0) { + afs_put_writeback(candidate); + _leave(" = %d", ret); + return ret; + } + } + + /* the page holds a ref on the writeback record */ + afs_put_writeback(wb); + set_page_private(page, 0); + ClearPagePrivate(page); + goto try_again; +} + +/* + * finalise part of a write to a page + */ +int afs_commit_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + loff_t i_size, maybe_i_size; + + _enter("{%x:%u},{%lx},%u,%u", + vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); + + maybe_i_size = (loff_t) page->index << PAGE_SHIFT; + maybe_i_size += to; + + i_size = i_size_read(&vnode->vfs_inode); + if (maybe_i_size > i_size) { + spin_lock(&vnode->writeback_lock); + i_size = i_size_read(&vnode->vfs_inode); + if (maybe_i_size > i_size) + i_size_write(&vnode->vfs_inode, maybe_i_size); + spin_unlock(&vnode->writeback_lock); + } + + SetPageUptodate(page); + set_page_dirty(page); + if (PageDirty(page)) + _debug("dirtied"); + + return 0; +} + +/* + * kill all the pages in the given range + */ +static void afs_kill_pages(struct afs_vnode *vnode, bool error, + pgoff_t first, pgoff_t last) +{ + struct pagevec pv; + unsigned count, loop; + + _enter("{%x:%u},%lx-%lx", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv, 0); + + do { + _debug("kill %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, + first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + ClearPageUptodate(pv.pages[loop]); + if (error) + SetPageError(pv.pages[loop]); + end_page_writeback(pv.pages[loop]); + } + + __pagevec_release(&pv); + } while (first < last); + + _leave(""); +} + +/* + * synchronously write back the locked page and any subsequent non-locked dirty + * pages also covered by the same writeback record + */ +static int afs_write_back_from_locked_page(struct afs_writeback *wb, + struct page *primary_page) +{ + struct page *pages[8], *page; + unsigned long count; + unsigned n, offset, to; + pgoff_t start, first, last; + int loop, ret; + + _enter(",%lx", primary_page->index); + + count = 1; + if (!clear_page_dirty_for_io(primary_page)) + BUG(); + if (test_set_page_writeback(primary_page)) + BUG(); + + /* find all consecutive lockable dirty pages, stopping when we find a + * page that is not immediately lockable, is not dirty or is missing, + * or we reach the end of the range */ + start = primary_page->index; + if (start >= wb->last) + goto no_more; + start++; + do { + _debug("more %lx [%lx]", start, count); + n = wb->last - start + 1; + if (n > ARRAY_SIZE(pages)) + n = ARRAY_SIZE(pages); + n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, + start, n, pages); + _debug("fgpc %u", n); + if (n == 0) + goto no_more; + if (pages[0]->index != start) { + do { + put_page(pages[--n]); + } while (n > 0); + goto no_more; + } + + for (loop = 0; loop < n; loop++) { + page = pages[loop]; + if (page->index > wb->last) + break; + if (TestSetPageLocked(page)) + break; + if (!PageDirty(page) || + page_private(page) != (unsigned long) wb) { + unlock_page(page); + break; + } + if (!clear_page_dirty_for_io(page)) + BUG(); + if (test_set_page_writeback(page)) + BUG(); + unlock_page(page); + put_page(page); + } + count += loop; + if (loop < n) { + for (; loop < n; loop++) + put_page(pages[loop]); + goto no_more; + } + + start += loop; + } while (start <= wb->last && count < 65536); + +no_more: + /* we now have a contiguous set of dirty pages, each with writeback set + * and the dirty mark cleared; the first page is locked and must remain + * so, all the rest are unlocked */ + first = primary_page->index; + last = first + count - 1; + + offset = (first == wb->first) ? wb->offset_first : 0; + to = (last == wb->last) ? wb->to_last : PAGE_SIZE; + + _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); + + ret = afs_vnode_store_data(wb, first, last, offset, to); + if (ret < 0) { + switch (ret) { + case -EDQUOT: + case -ENOSPC: + set_bit(AS_ENOSPC, + &wb->vnode->vfs_inode.i_mapping->flags); + break; + case -EROFS: + case -EIO: + case -EREMOTEIO: + case -EFBIG: + case -ENOENT: + case -ENOMEDIUM: + case -ENXIO: + afs_kill_pages(wb->vnode, true, first, last); + set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags); + break; + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + afs_kill_pages(wb->vnode, false, first, last); + break; + default: + break; + } + } else { + ret = count; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * write a page back to the server + * - the caller locked the page for us + */ +int afs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = page->mapping->backing_dev_info; + struct afs_writeback *wb; + int ret; + + _enter("{%lx},", page->index); + + wb = (struct afs_writeback *) page_private(page); + ASSERT(wb != NULL); + + ret = afs_write_back_from_locked_page(wb, page); + unlock_page(page); + if (ret < 0) { + _leave(" = %d", ret); + return 0; + } + + wbc->nr_to_write -= ret; + if (wbc->nonblocking && bdi_write_congested(bdi)) + wbc->encountered_congestion = 1; + + _leave(" = 0"); + return 0; +} + +/* + * write a region of pages back to the server + */ +int afs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + pgoff_t index, pgoff_t end, pgoff_t *_next) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + struct afs_writeback *wb; + struct page *page; + int ret, n; + + _enter(",,%lx,%lx,", index, end); + + do { + n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, + 1, &page); + if (!n) + break; + + _debug("wback %lx", page->index); + + if (page->index > end) { + *_next = index; + page_cache_release(page); + _leave(" = 0 [%lx]", *_next); + return 0; + } + + /* at this point we hold neither mapping->tree_lock nor lock on + * the page itself: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled back from + * swapper_space to tmpfs file mapping + */ + lock_page(page); + + if (page->mapping != mapping) { + unlock_page(page); + page_cache_release(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || !PageDirty(page)) { + unlock_page(page); + continue; + } + + wb = (struct afs_writeback *) page_private(page); + ASSERT(wb != NULL); + + spin_lock(&wb->vnode->writeback_lock); + wb->state = AFS_WBACK_WRITING; + spin_unlock(&wb->vnode->writeback_lock); + + ret = afs_write_back_from_locked_page(wb, page); + unlock_page(page); + page_cache_release(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; + } + + wbc->nr_to_write -= ret; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + break; + } + + cond_resched(); + } while (index < end && wbc->nr_to_write > 0); + + *_next = index; + _leave(" = 0 [%lx]", *_next); + return 0; +} + +/* + * write some of the pending data back to the server + */ +int afs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + pgoff_t start, end, next; + int ret; + + _enter(""); + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + _leave(" = 0 [congest]"); + return 0; + } + + if (wbc->range_cyclic) { + start = mapping->writeback_index; + end = -1; + ret = afs_writepages_region(mapping, wbc, start, end, &next); + if (start > 0 && wbc->nr_to_write > 0 && ret == 0 && + !(wbc->nonblocking && wbc->encountered_congestion)) + ret = afs_writepages_region(mapping, wbc, 0, start, + &next); + mapping->writeback_index = next; + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { + end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT); + ret = afs_writepages_region(mapping, wbc, 0, end, &next); + if (wbc->nr_to_write > 0) + mapping->writeback_index = next; + } else { + start = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + ret = afs_writepages_region(mapping, wbc, start, end, &next); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * write an inode back + */ +int afs_write_inode(struct inode *inode, int sync) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + int ret; + + _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + + ret = 0; + if (sync) { + ret = filemap_fdatawait(inode->i_mapping); + if (ret < 0) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * completion of write to server + */ +void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) +{ + struct afs_writeback *wb = call->wb; + struct pagevec pv; + unsigned count, loop; + pgoff_t first = call->first, last = call->last; + bool free_wb; + + _enter("{%x:%u},{%lx-%lx}", + vnode->fid.vid, vnode->fid.vnode, first, last); + + ASSERT(wb != NULL); + + pagevec_init(&pv, 0); + + do { + _debug("done %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(call->mapping, first, count, + pv.pages); + ASSERTCMP(pv.nr, ==, count); + + spin_lock(&vnode->writeback_lock); + for (loop = 0; loop < count; loop++) { + struct page *page = pv.pages[loop]; + end_page_writeback(page); + if (page_private(page) == (unsigned long) wb) { + set_page_private(page, 0); + ClearPagePrivate(page); + wb->usage--; + } + } + free_wb = false; + if (wb->usage == 0) { + afs_unlink_writeback(wb); + free_wb = true; + } + spin_unlock(&vnode->writeback_lock); + first += count; + if (free_wb) { + afs_free_writeback(wb); + wb = NULL; + } + + __pagevec_release(&pv); + } while (first <= last); + + _leave(""); +} + +/* + * write to an AFS file + */ +ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct dentry *dentry = iocb->ki_filp->f_path.dentry; + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + ssize_t result; + size_t count = iov_length(iov, nr_segs); + int ret; + + _enter("{%x.%u},{%zu},%lu,", + vnode->fid.vid, vnode->fid.vnode, count, nr_segs); + + if (IS_SWAPFILE(&vnode->vfs_inode)) { + printk(KERN_INFO + "AFS: Attempt to write to active swap file!\n"); + return -EBUSY; + } + + if (!count) + return 0; + + result = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (IS_ERR_VALUE(result)) { + _leave(" = %zd", result); + return result; + } + + /* return error values for O_SYNC and IS_SYNC() */ + if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) { + ret = afs_fsync(iocb->ki_filp, dentry, 1); + if (ret < 0) + result = ret; + } + + _leave(" = %zd", result); + return result; +} + +/* + * flush the vnode to the fileserver + */ +int afs_writeback_all(struct afs_vnode *vnode) +{ + struct address_space *mapping = vnode->vfs_inode.i_mapping; + struct writeback_control wbc = { + .bdi = mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_writepages = 1, + .range_cyclic = 1, + }; + int ret; + + _enter(""); + + ret = mapping->a_ops->writepages(mapping, &wbc); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + + _leave(" = %d", ret); + return ret; +} + +/* + * flush any dirty pages for this process, and check for write errors. + * - the return status from this call provides a reliable indication of + * whether any write errors occurred for this process. + */ +int afs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct afs_writeback *wb, *xwb; + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + int ret; + + _enter("{%x:%u},{n=%s},%d", + vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, + datasync); + + /* use a writeback record as a marker in the queue - when this reaches + * the front of the queue, all the outstanding writes are either + * completed or rejected */ + wb = kzalloc(sizeof(*wb), GFP_KERNEL); + if (!wb) + return -ENOMEM; + wb->vnode = vnode; + wb->first = 0; + wb->last = -1; + wb->offset_first = 0; + wb->to_last = PAGE_SIZE; + wb->usage = 1; + wb->state = AFS_WBACK_SYNCING; + init_waitqueue_head(&wb->waitq); + + spin_lock(&vnode->writeback_lock); + list_for_each_entry(xwb, &vnode->writebacks, link) { + if (xwb->state == AFS_WBACK_PENDING) + xwb->state = AFS_WBACK_CONFLICTING; + } + list_add_tail(&wb->link, &vnode->writebacks); + spin_unlock(&vnode->writeback_lock); + + /* push all the outstanding writebacks to the server */ + ret = afs_writeback_all(vnode); + if (ret < 0) { + afs_put_writeback(wb); + _leave(" = %d [wb]", ret); + return ret; + } + + /* wait for the preceding writes to actually complete */ + ret = wait_event_interruptible(wb->waitq, + wb->state == AFS_WBACK_COMPLETE || + vnode->writebacks.next == &wb->link); + afs_put_writeback(wb); + _leave(" = %d", ret); + return ret; +} @@ -30,6 +30,7 @@ #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/security.h> +#include <linux/eventfd.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -346,10 +347,9 @@ void fastcall exit_aio(struct mm_struct *mm) wait_for_all_aios(ctx); /* - * this is an overkill, but ensures we don't leave - * the ctx on the aio_wq + * Ensure we don't leave the ctx on the aio_wq */ - flush_workqueue(aio_wq); + cancel_work_sync(&ctx->wq.work); if (1 != atomic_read(&ctx->users)) printk(KERN_DEBUG @@ -372,7 +372,7 @@ void fastcall __put_ioctx(struct kioctx *ctx) BUG_ON(ctx->reqs_active); cancel_delayed_work(&ctx->wq); - flush_workqueue(aio_wq); + cancel_work_sync(&ctx->wq.work); aio_free_ring(ctx); mmdrop(ctx->mm); ctx->mm = NULL; @@ -418,6 +418,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) req->private = NULL; req->ki_iovec = NULL; INIT_LIST_HEAD(&req->ki_run_list); + req->ki_eventfd = ERR_PTR(-EINVAL); /* Check if the completion queue has enough free space to * accept an event from this io. @@ -459,6 +460,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) { assert_spin_locked(&ctx->ctx_lock); + if (!IS_ERR(req->ki_eventfd)) + fput(req->ki_eventfd); if (req->ki_dtor) req->ki_dtor(req); if (req->ki_iovec != &req->ki_inline_vec) @@ -943,6 +946,14 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) return 1; } + /* + * Check if the user asked us to deliver the result through an + * eventfd. The eventfd_signal() function is safe to be called + * from IRQ context. + */ + if (!IS_ERR(iocb->ki_eventfd)) + eventfd_signal(iocb->ki_eventfd, 1); + info = &ctx->ring_info; /* add a completion event to the ring buffer. @@ -1527,8 +1538,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ssize_t ret; /* enforce forwards compatibility on users */ - if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2 || - iocb->aio_reserved3)) { + if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) { pr_debug("EINVAL: io_submit: reserve field set\n"); return -EINVAL; } @@ -1552,6 +1562,19 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, fput(file); return -EAGAIN; } + if (iocb->aio_flags & IOCB_FLAG_RESFD) { + /* + * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an + * instance of the file* now. The file descriptor must be + * an eventfd() fd, and will be signaled for each completed + * event using the eventfd_signal() function. + */ + req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); + if (unlikely(IS_ERR(req->ki_eventfd))) { + ret = PTR_ERR(req->ki_eventfd); + goto out_put_req; + } + } req->ki_filp = file; ret = put_user(req->ki_key, &user_iocb->aio_key); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c new file mode 100644 index 00000000000..40fe3a3222e --- /dev/null +++ b/fs/anon_inodes.c @@ -0,0 +1,200 @@ +/* + * fs/anon_inodes.c + * + * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> + * + * Thanks to Arnd Bergmann for code review and suggestions. + * More changes for Thomas Gleixner suggestions. + * + */ + +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/magic.h> +#include <linux/anon_inodes.h> + +#include <asm/uaccess.h> + +static struct vfsmount *anon_inode_mnt __read_mostly; +static struct inode *anon_inode_inode; +static const struct file_operations anon_inode_fops; + +static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC, + mnt); +} + +static int anon_inodefs_delete_dentry(struct dentry *dentry) +{ + /* + * We faked vfs to believe the dentry was hashed when we created it. + * Now we restore the flag so that dput() will work correctly. + */ + dentry->d_flags |= DCACHE_UNHASHED; + return 1; +} + +static struct file_system_type anon_inode_fs_type = { + .name = "anon_inodefs", + .get_sb = anon_inodefs_get_sb, + .kill_sb = kill_anon_super, +}; +static struct dentry_operations anon_inodefs_dentry_operations = { + .d_delete = anon_inodefs_delete_dentry, +}; + +/** + * anon_inode_getfd - creates a new file instance by hooking it up to and + * anonymous inode, and a dentry that describe the "class" + * of the file + * + * @pfd: [out] pointer to the file descriptor + * @dpinode: [out] pointer to the inode + * @pfile: [out] pointer to the file struct + * @name: [in] name of the "class" of the new file + * @fops [in] file operations for the new file + * @priv [in] private data for the new file (will be file's private_data) + * + * Creates a new file by hooking it on a single inode. This is useful for files + * that do not need to have a full-fledged inode in order to operate correctly. + * All the files created with anon_inode_getfd() will share a single inode, by + * hence saving memory and avoiding code duplication for the file/inode/dentry + * setup. + */ +int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, + const char *name, const struct file_operations *fops, + void *priv) +{ + struct qstr this; + struct dentry *dentry; + struct inode *inode; + struct file *file; + int error, fd; + + if (IS_ERR(anon_inode_inode)) + return -ENODEV; + file = get_empty_filp(); + if (!file) + return -ENFILE; + + inode = igrab(anon_inode_inode); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto err_put_filp; + } + + error = get_unused_fd(); + if (error < 0) + goto err_iput; + fd = error; + + /* + * Link the inode to a directory entry by creating a unique name + * using the inode sequence number. + */ + error = -ENOMEM; + this.name = name; + this.len = strlen(name); + this.hash = 0; + dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); + if (!dentry) + goto err_put_unused_fd; + dentry->d_op = &anon_inodefs_dentry_operations; + /* Do not publish this dentry inside the global dentry hash table */ + dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(dentry, inode); + + file->f_path.mnt = mntget(anon_inode_mnt); + file->f_path.dentry = dentry; + file->f_mapping = inode->i_mapping; + + file->f_pos = 0; + file->f_flags = O_RDWR; + file->f_op = fops; + file->f_mode = FMODE_READ | FMODE_WRITE; + file->f_version = 0; + file->private_data = priv; + + fd_install(fd, file); + + *pfd = fd; + *pinode = inode; + *pfile = file; + return 0; + +err_put_unused_fd: + put_unused_fd(fd); +err_iput: + iput(inode); +err_put_filp: + put_filp(file); + return error; +} + +/* + * A single inode exist for all anon_inode files. Contrary to pipes, + * anon_inode inodes has no per-instance data associated, so we can avoid + * the allocation of multiple of them. + */ +static struct inode *anon_inode_mkinode(void) +{ + struct inode *inode = new_inode(anon_inode_mnt->mnt_sb); + + if (!inode) + return ERR_PTR(-ENOMEM); + + inode->i_fop = &anon_inode_fops; + + /* + * Mark the inode dirty from the very beginning, + * that way it will never be moved to the dirty + * list because mark_inode_dirty() will think + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; + inode->i_mode = S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + return inode; +} + +static int __init anon_inode_init(void) +{ + int error; + + error = register_filesystem(&anon_inode_fs_type); + if (error) + goto err_exit; + anon_inode_mnt = kern_mount(&anon_inode_fs_type); + if (IS_ERR(anon_inode_mnt)) { + error = PTR_ERR(anon_inode_mnt); + goto err_unregister_filesystem; + } + anon_inode_inode = anon_inode_mkinode(); + if (IS_ERR(anon_inode_inode)) { + error = PTR_ERR(anon_inode_inode); + goto err_mntput; + } + + return 0; + +err_mntput: + mntput(anon_inode_mnt); +err_unregister_filesystem: + unregister_filesystem(&anon_inode_fs_type); +err_exit: + panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); +} + +fs_initcall(anon_inode_init); + diff --git a/fs/attr.c b/fs/attr.c index 97de9467087..a0a0c7b07ba 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -9,7 +9,6 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/capability.h> #include <linux/fsnotify.h> #include <linux/fcntl.h> diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 4ef544434b5..8b4cca3c470 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -101,7 +101,7 @@ struct autofs_symlink { struct autofs_sb_info { u32 magic; struct file *pipe; - pid_t oz_pgrp; + struct pid *oz_pgrp; int catatonic; struct super_block *sb; unsigned long exp_timeout; @@ -122,7 +122,7 @@ static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb) filesystem without "magic".) */ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) { - return sbi->catatonic || process_group(current) == sbi->oz_pgrp; + return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; } /* Hash operations */ diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index aa0b61ff827..e7204d71acc 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -34,12 +34,14 @@ void autofs_kill_sb(struct super_block *sb) if (!sbi) goto out_kill_sb; - if ( !sbi->catatonic ) + if (!sbi->catatonic) autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ + put_pid(sbi->oz_pgrp); + autofs_hash_nuke(sbi); - for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { - if ( test_bit(n, sbi->symlink_bitmap) ) + for (n = 0; n < AUTOFS_MAX_SYMLINKS; n++) { + if (test_bit(n, sbi->symlink_bitmap)) kfree(sbi->symlink[n].data); } @@ -69,7 +71,8 @@ static match_table_t autofs_tokens = { {Opt_err, NULL} }; -static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, pid_t *pgrp, int *minproto, int *maxproto) +static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, + pid_t *pgrp, int *minproto, int *maxproto) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -138,9 +141,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) int pipefd; struct autofs_sb_info *sbi; int minproto, maxproto; + pid_t pgid; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if ( !sbi ) + if (!sbi) goto fail_unlock; DPRINTK(("autofs: starting up, sbi = %p\n",sbi)); @@ -149,7 +153,6 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) sbi->pipe = NULL; sbi->catatonic = 1; sbi->exp_timeout = 0; - sbi->oz_pgrp = process_group(current); autofs_initialize_hash(&sbi->dirhash); sbi->queues = NULL; memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN); @@ -169,26 +172,36 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; /* Can this call block? - WTF cares? s is locked. */ - if ( parse_options(data,&pipefd,&root_inode->i_uid,&root_inode->i_gid,&sbi->oz_pgrp,&minproto,&maxproto) ) { + if (parse_options(data, &pipefd, &root_inode->i_uid, + &root_inode->i_gid, &pgid, &minproto, + &maxproto)) { printk("autofs: called with bogus options\n"); goto fail_dput; } /* Couldn't this be tested earlier? */ - if ( minproto > AUTOFS_PROTO_VERSION || - maxproto < AUTOFS_PROTO_VERSION ) { + if (minproto > AUTOFS_PROTO_VERSION || + maxproto < AUTOFS_PROTO_VERSION) { printk("autofs: kernel does not match daemon version\n"); goto fail_dput; } - DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, sbi->oz_pgrp)); + DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, pgid)); + sbi->oz_pgrp = find_get_pid(pgid); + + if (!sbi->oz_pgrp) { + printk("autofs: could not find process group %d\n", pgid); + goto fail_dput; + } + pipe = fget(pipefd); - if ( !pipe ) { + if (!pipe) { printk("autofs: could not open pipe file descriptor\n"); - goto fail_dput; + goto fail_put_pid; } - if ( !pipe->f_op || !pipe->f_op->write ) + + if (!pipe->f_op || !pipe->f_op->write) goto fail_fput; sbi->pipe = pipe; sbi->catatonic = 0; @@ -202,6 +215,8 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) fail_fput: printk("autofs: pipe file descriptor does not contain proper ops\n"); fput(pipe); +fail_put_pid: + put_pid(sbi->oz_pgrp); fail_dput: dput(root); goto fail_free; @@ -230,7 +245,7 @@ static void autofs_read_inode(struct inode *inode) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; - if ( ino == AUTOFS_ROOT_INO ) { + if (ino == AUTOFS_ROOT_INO) { inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &autofs_root_inode_operations; inode->i_fop = &autofs_root_operations; @@ -241,12 +256,12 @@ static void autofs_read_inode(struct inode *inode) inode->i_uid = inode->i_sb->s_root->d_inode->i_uid; inode->i_gid = inode->i_sb->s_root->d_inode->i_gid; - if ( ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO ) { + if (ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO) { /* Symlink inode - should be in symlink list */ struct autofs_symlink *sl; n = ino - AUTOFS_FIRST_SYMLINK; - if ( n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) { + if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) { printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino); return; } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index f2597205939..c1489533277 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -67,8 +67,8 @@ static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldi filp->f_pos = ++nr; /* fall through */ default: - while ( onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent) ) { - if ( !ent->dentry || d_mountpoint(ent->dentry) ) { + while (onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent)) { + if (!ent->dentry || d_mountpoint(ent->dentry)) { if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0) goto out; filp->f_pos = nr; @@ -88,10 +88,10 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str struct autofs_dir_ent *ent; int status = 0; - if ( !(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)) ) { + if (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))) { do { - if ( status && dentry->d_inode ) { - if ( status != -ENOENT ) + if (status && dentry->d_inode) { + if (status != -ENOENT) printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name); return 0; /* Try to get the kernel to invalidate this dentry */ } @@ -106,7 +106,7 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str return 1; } status = autofs_wait(sbi, &dentry->d_name); - } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)) ); + } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))); } /* Abuse this field as a pointer to the directory entry, used to @@ -124,13 +124,13 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str /* If this is a directory that isn't a mount point, bitch at the daemon and fix it in user space */ - if ( S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) ) { + if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) { return !autofs_wait(sbi, &dentry->d_name); } /* We don't update the usages for the autofs daemon itself, this is necessary for recursive autofs mounts */ - if ( !autofs_oz_mode(sbi) ) { + if (!autofs_oz_mode(sbi)) { autofs_update_usage(&sbi->dirhash,ent); } @@ -157,7 +157,7 @@ static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) sbi = autofs_sbi(dir->i_sb); /* Pending dentry */ - if ( dentry->d_flags & DCACHE_AUTOFS_PENDING ) { + if (dentry->d_flags & DCACHE_AUTOFS_PENDING) { if (autofs_oz_mode(sbi)) res = 1; else @@ -173,7 +173,7 @@ static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) } /* Check for a non-mountpoint directory */ - if ( S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) ) { + if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) { if (autofs_oz_mode(sbi)) res = 1; else @@ -183,9 +183,9 @@ static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) } /* Update the usage list */ - if ( !autofs_oz_mode(sbi) ) { + if (!autofs_oz_mode(sbi)) { ent = (struct autofs_dir_ent *) dentry->d_time; - if ( ent ) + if (ent) autofs_update_usage(&sbi->dirhash,ent); } unlock_kernel(); @@ -213,8 +213,10 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr sbi = autofs_sbi(dir->i_sb); oz_mode = autofs_oz_mode(sbi); - DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", - current->pid, process_group(current), sbi->catatonic, oz_mode)); + DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, " + "oz_mode = %d\n", pid_nr(task_pid(current)), + process_group(current), sbi->catatonic, + oz_mode)); /* * Mark the dentry incomplete, but add it. This is needed so @@ -258,7 +260,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr * doesn't do the right thing for all system calls, but it should * be OK for the operations we permit from an autofs. */ - if ( dentry->d_inode && d_unhashed(dentry) ) + if (dentry->d_inode && d_unhashed(dentry)) return ERR_PTR(-ENOENT); return NULL; @@ -277,18 +279,18 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c autofs_say(dentry->d_name.name,dentry->d_name.len); lock_kernel(); - if ( !autofs_oz_mode(sbi) ) { + if (!autofs_oz_mode(sbi)) { unlock_kernel(); return -EACCES; } - if ( autofs_hash_lookup(dh, &dentry->d_name) ) { + if (autofs_hash_lookup(dh, &dentry->d_name)) { unlock_kernel(); return -EEXIST; } n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS); - if ( n >= AUTOFS_MAX_SYMLINKS ) { + if (n >= AUTOFS_MAX_SYMLINKS) { unlock_kernel(); return -ENOSPC; } @@ -297,14 +299,14 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c sl = &sbi->symlink[n]; sl->len = strlen(symname); sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL); - if ( !sl->data ) { + if (!sl->data) { clear_bit(n,sbi->symlink_bitmap); unlock_kernel(); return -ENOSPC; } ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); - if ( !ent ) { + if (!ent) { kfree(sl->data); clear_bit(n,sbi->symlink_bitmap); unlock_kernel(); @@ -312,7 +314,7 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c } ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL); - if ( !ent->name ) { + if (!ent->name) { kfree(sl->data); kfree(ent); clear_bit(n,sbi->symlink_bitmap); @@ -354,23 +356,23 @@ static int autofs_root_unlink(struct inode *dir, struct dentry *dentry) /* This allows root to remove symlinks */ lock_kernel(); - if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) { + if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) { unlock_kernel(); return -EACCES; } ent = autofs_hash_lookup(dh, &dentry->d_name); - if ( !ent ) { + if (!ent) { unlock_kernel(); return -ENOENT; } n = ent->ino - AUTOFS_FIRST_SYMLINK; - if ( n >= AUTOFS_MAX_SYMLINKS ) { + if (n >= AUTOFS_MAX_SYMLINKS) { unlock_kernel(); return -EISDIR; /* It's a directory, dummy */ } - if ( !test_bit(n,sbi->symlink_bitmap) ) { + if (!test_bit(n,sbi->symlink_bitmap)) { unlock_kernel(); return -EINVAL; /* Nonexistent symlink? Shouldn't happen */ } @@ -392,23 +394,23 @@ static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry) struct autofs_dir_ent *ent; lock_kernel(); - if ( !autofs_oz_mode(sbi) ) { + if (!autofs_oz_mode(sbi)) { unlock_kernel(); return -EACCES; } ent = autofs_hash_lookup(dh, &dentry->d_name); - if ( !ent ) { + if (!ent) { unlock_kernel(); return -ENOENT; } - if ( (unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO ) { + if ((unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO) { unlock_kernel(); return -ENOTDIR; /* Not a directory */ } - if ( ent->dentry != dentry ) { + if (ent->dentry != dentry) { printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name); } @@ -429,18 +431,18 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) ino_t ino; lock_kernel(); - if ( !autofs_oz_mode(sbi) ) { + if (!autofs_oz_mode(sbi)) { unlock_kernel(); return -EACCES; } ent = autofs_hash_lookup(dh, &dentry->d_name); - if ( ent ) { + if (ent) { unlock_kernel(); return -EEXIST; } - if ( sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO ) { + if (sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO) { printk("autofs: Out of inode numbers -- what the heck did you do??\n"); unlock_kernel(); return -ENOSPC; @@ -448,13 +450,13 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) ino = sbi->next_dir_ino++; ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL); - if ( !ent ) { + if (!ent) { unlock_kernel(); return -ENOSPC; } ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL); - if ( !ent->name ) { + if (!ent->name) { kfree(ent); unlock_kernel(); return -ENOSPC; @@ -483,7 +485,7 @@ static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi, put_user(sbi->exp_timeout / HZ, p)) return -EFAULT; - if ( ntimeout > ULONG_MAX/HZ ) + if (ntimeout > ULONG_MAX/HZ) sbi->exp_timeout = 0; else sbi->exp_timeout = ntimeout * HZ; @@ -511,15 +513,14 @@ static inline int autofs_expire_run(struct super_block *sb, pkt.hdr.proto_version = AUTOFS_PROTO_VERSION; pkt.hdr.type = autofs_ptype_expire; - if ( !sbi->exp_timeout || - !(ent = autofs_expire(sb,sbi,mnt)) ) + if (!sbi->exp_timeout || !(ent = autofs_expire(sb,sbi,mnt))) return -EAGAIN; pkt.len = ent->len; memcpy(pkt.name, ent->name, pkt.len); pkt.name[pkt.len] = '\0'; - if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) + if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) return -EFAULT; return 0; @@ -537,11 +538,11 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp, DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current))); - if ( _IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || - _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT ) + if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || + _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) return -ENOTTY; - if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) + if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EPERM; switch(cmd) { diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 26063dc84a2..692364e8ffc 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -18,7 +18,6 @@ #include <linux/pagemap.h> #include <linux/parser.h> #include <linux/bitops.h> -#include <linux/smp_lock.h> #include <linux/magic.h> #include "autofs_i.h" #include <linux/module.h> @@ -219,8 +218,7 @@ static match_table_t tokens = { }; static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, - pid_t *pgrp, unsigned int *type, - int *minproto, int *maxproto) + pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -315,7 +313,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) struct autofs_info *ino; sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); - if ( !sbi ) + if (!sbi) goto fail_unlock; DPRINTK("starting up, sbi = %p",sbi); @@ -364,10 +362,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) root->d_fsdata = ino; /* Can this call block? */ - if (parse_options(data, &pipefd, - &root_inode->i_uid, &root_inode->i_gid, - &sbi->oz_pgrp, &sbi->type, - &sbi->min_proto, &sbi->max_proto)) { + if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, + &sbi->oz_pgrp, &sbi->type, &sbi->min_proto, + &sbi->max_proto)) { printk("autofs: called with bogus options\n"); goto fail_dput; } @@ -397,11 +394,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp); pipe = fget(pipefd); - if ( !pipe ) { + if (!pipe) { printk("autofs: could not open pipe file descriptor\n"); goto fail_dput; } - if ( !pipe->f_op || !pipe->f_op->write ) + if (!pipe->f_op || !pipe->f_op->write) goto fail_fput; sbi->pipe = pipe; sbi->pipefd = pipefd; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d0e9b3a3905..2d4c8a3e604 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -17,7 +17,6 @@ #include <linux/stat.h> #include <linux/param.h> #include <linux/time.h> -#include <linux/smp_lock.h> #include "autofs_i.h" static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); @@ -760,7 +759,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) struct autofs_info *p_ino; /* This allows root to remove symlinks */ - if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) + if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EACCES; if (atomic_dec_and_test(&ino->count)) { @@ -834,7 +833,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct autofs_info *p_ino; struct inode *inode; - if ( !autofs4_oz_mode(sbi) ) + if (!autofs4_oz_mode(sbi)) return -EACCES; DPRINTK("dentry %p, creating %.*s", @@ -872,11 +871,11 @@ static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, int rv; unsigned long ntimeout; - if ( (rv = get_user(ntimeout, p)) || - (rv = put_user(sbi->exp_timeout/HZ, p)) ) + if ((rv = get_user(ntimeout, p)) || + (rv = put_user(sbi->exp_timeout/HZ, p))) return rv; - if ( ntimeout > ULONG_MAX/HZ ) + if (ntimeout > ULONG_MAX/HZ) sbi->exp_timeout = 0; else sbi->exp_timeout = ntimeout * HZ; @@ -907,7 +906,7 @@ static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p) DPRINTK("returning %d", sbi->needs_reghost); status = put_user(sbi->needs_reghost, p); - if ( status ) + if (status) return status; sbi->needs_reghost = 0; @@ -976,11 +975,11 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp, DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u", cmd,arg,sbi,process_group(current)); - if ( _IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || - _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT ) + if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || + _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) return -ENOTTY; - if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) + if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EPERM; switch(cmd) { diff --git a/fs/bad_inode.c b/fs/bad_inode.c index efeab2fab40..329ee473eed 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -12,7 +12,6 @@ #include <linux/module.h> #include <linux/stat.h> #include <linux/time.h> -#include <linux/smp_lock.h> #include <linux/namei.h> #include <linux/poll.h> diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index fe96108a788..a5c5171c282 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -292,10 +292,8 @@ befs_destroy_inode(struct inode *inode) static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; - - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&bi->vfs_inode); - } + + inode_init_once(&bi->vfs_inode); } static void diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index edc08d89aab..58c7bd9f530 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -248,8 +248,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct bfs_inode_info *bi = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&bi->vfs_inode); + inode_init_once(&bi->vfs_inode); } static int init_inodecache(void) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9cc4f0a8aaa..fa8ea33ab0b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -31,7 +31,6 @@ #include <linux/init.h> #include <linux/highuid.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/compiler.h> #include <linux/highmem.h> #include <linux/pagemap.h> @@ -39,6 +38,7 @@ #include <linux/syscalls.h> #include <linux/random.h> #include <linux/elf.h> +#include <linux/utsname.h> #include <asm/uaccess.h> #include <asm/param.h> #include <asm/page.h> @@ -871,6 +871,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) elf_prot, elf_flags); if (BAD_ADDR(error)) { send_sig(SIGKILL, current, 0); + retval = IS_ERR((void *)error) ? + PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } @@ -900,6 +902,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ send_sig(SIGKILL, current, 0); + retval = -EINVAL; goto out_free_dentry; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index f3ddca4a387..9d62fbad3d4 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -30,7 +30,6 @@ #include <linux/personality.h> #include <linux/ptrace.h> #include <linux/init.h> -#include <linux/smp_lock.h> #include <linux/elf.h> #include <linux/elf-fdpic.h> #include <linux/elfcore.h> diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 1f2d1ad6331..576dd7de227 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -12,7 +12,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/binfmts.h> #include <linux/elf.h> #include <linux/init.h> diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 7b0265d7f3a..861141b4f6d 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -558,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm, if (!realdatastart) realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", - (int)-datapos); + (int)-realdatastart); do_munmap(current->mm, textpos, text_len); ret = realdatastart; goto err; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index e6f57990b12..330fd3fe854 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -18,7 +18,7 @@ #include <linux/module.h> #include <linux/init.h> - +#include <linux/sched.h> #include <linux/binfmts.h> #include <linux/slab.h> #include <linux/ctype.h> @@ -675,19 +675,8 @@ static ssize_t bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { char *s = enabled ? "enabled" : "disabled"; - int len = strlen(s); - loff_t pos = *ppos; - if (pos < 0) - return -EINVAL; - if (pos >= len) - return 0; - if (len < pos + nbytes) - nbytes = len - pos; - if (copy_to_user(buf, s + pos, nbytes)) - return -EFAULT; - *ppos = pos + nbytes; - return nbytes; + return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); } static ssize_t bm_status_write(struct file * file, const char __user * buffer, @@ -727,8 +716,8 @@ static const struct super_operations s_ops = { static int bm_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr bm_files[] = { - [1] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, - [2] = {"register", &bm_register_operations, S_IWUSR}, + [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, + [3] = {"register", &bm_register_operations, S_IWUSR}, /* last one */ {""} }; int err = simple_fill_super(sb, 0x42494e4d, bm_files); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 1edbcca25a7..304c88544d8 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -12,7 +12,6 @@ #include <linux/binfmts.h> #include <linux/init.h> #include <linux/file.h> -#include <linux/smp_lock.h> #include <linux/err.h> #include <linux/fs.h> diff --git a/fs/block_dev.c b/fs/block_dev.c index f02b7bdd986..ea1480a16f5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -22,6 +22,7 @@ #include <linux/mount.h> #include <linux/uio.h> #include <linux/namei.h> +#include <linux/log2.h> #include <asm/uaccess.h> #include "internal.h" @@ -67,7 +68,7 @@ static void kill_bdev(struct block_device *bdev) int set_blocksize(struct block_device *bdev, int size) { /* Size must be a power of two, and between 512 and PAGE_SIZE */ - if (size > PAGE_SIZE || size < 512 || (size & (size-1))) + if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ @@ -457,17 +458,15 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - memset(bdev, 0, sizeof(*bdev)); - mutex_init(&bdev->bd_mutex); - sema_init(&bdev->bd_mount_sem, 1); - INIT_LIST_HEAD(&bdev->bd_inodes); - INIT_LIST_HEAD(&bdev->bd_list); + memset(bdev, 0, sizeof(*bdev)); + mutex_init(&bdev->bd_mutex); + sema_init(&bdev->bd_mount_sem, 1); + INIT_LIST_HEAD(&bdev->bd_inodes); + INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_list); + INIT_LIST_HEAD(&bdev->bd_holder_list); #endif - inode_init_once(&ei->vfs_inode); - } + inode_init_once(&ei->vfs_inode); } static inline void __bd_forget(struct inode *inode) diff --git a/fs/buffer.c b/fs/buffer.c index 7db24b9e544..aa68206bd51 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -24,7 +24,6 @@ #include <linux/mm.h> #include <linux/percpu.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/capability.h> #include <linux/blkdev.h> #include <linux/file.h> @@ -982,7 +981,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct page *page; struct buffer_head *bh; - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, index, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (!page) return NULL; @@ -1727,6 +1727,7 @@ recover: } while ((bh = bh->b_this_page) != head); SetPageError(page); BUG_ON(PageWriteback(page)); + mapping_set_error(page->mapping, err); set_page_writeback(page); do { struct buffer_head *next = bh->b_this_page; @@ -1846,13 +1847,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (block_start >= to) break; if (buffer_new(bh)) { - void *kaddr; - clear_buffer_new(bh); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+block_start, 0, bh->b_size); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, block_start, bh->b_size, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh); } @@ -1940,10 +1936,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block) SetPageError(page); } if (!buffer_mapped(bh)) { - void *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, i * blocksize, blocksize, + KM_USER0); if (!err) set_buffer_uptodate(bh); continue; @@ -2086,7 +2080,6 @@ int cont_prepare_write(struct page *page, unsigned offset, long status; unsigned zerofrom; unsigned blocksize = 1 << inode->i_blkbits; - void *kaddr; while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { status = -ENOMEM; @@ -2108,10 +2101,8 @@ int cont_prepare_write(struct page *page, unsigned offset, PAGE_CACHE_SIZE, get_block); if (status) goto out_unmap; - kaddr = kmap_atomic(new_page, KM_USER0); - memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); - flush_dcache_page(new_page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom, + KM_USER0); generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); unlock_page(new_page); page_cache_release(new_page); @@ -2138,10 +2129,7 @@ int cont_prepare_write(struct page *page, unsigned offset, if (status) goto out1; if (zerofrom < offset) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+zerofrom, 0, offset-zerofrom); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0); __block_commit_write(inode, page, zerofrom, offset); } return 0; @@ -2340,10 +2328,7 @@ failed: * Error recovery is pretty slack. Clear the page and mark it dirty * so we'll later zero out any blocks which _were_ allocated. */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); SetPageUptodate(page); set_page_dirty(page); return ret; @@ -2382,7 +2367,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, loff_t i_size = i_size_read(inode); const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; - void *kaddr; int ret; /* Is the page fully inside i_size? */ @@ -2413,10 +2397,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) @@ -2437,7 +2418,6 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) unsigned to; struct page *page; const struct address_space_operations *a_ops = mapping->a_ops; - char *kaddr; int ret = 0; if ((offset & (blocksize - 1)) == 0) @@ -2451,10 +2431,8 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) to = (offset + blocksize) & ~(blocksize - 1); ret = a_ops->prepare_write(NULL, page, offset, to); if (ret == 0) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, + KM_USER0); /* * It would be more correct to call aops->commit_write() * here, but this is more efficient. @@ -2480,7 +2458,6 @@ int block_truncate_page(struct address_space *mapping, struct inode *inode = mapping->host; struct page *page; struct buffer_head *bh; - void *kaddr; int err; blocksize = 1 << inode->i_blkbits; @@ -2534,11 +2511,7 @@ int block_truncate_page(struct address_space *mapping, goto unlock; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - + zero_user_page(page, offset, length, KM_USER0); mark_buffer_dirty(bh); err = 0; @@ -2559,7 +2532,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, loff_t i_size = i_size_read(inode); const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; - void *kaddr; /* Is the page fully inside i_size? */ if (page->index < end_index) @@ -2585,10 +2557,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); return __block_write_full_page(inode, page, get_block, wbc); } @@ -2930,8 +2899,9 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { + INIT_LIST_HEAD(&ret->b_assoc_buffers); get_cpu_var(bh_accounting).nr++; recalc_bh_state(); put_cpu_var(bh_accounting); @@ -2950,17 +2920,6 @@ void free_buffer_head(struct buffer_head *bh) } EXPORT_SYMBOL(free_buffer_head); -static void -init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) -{ - if (flags & SLAB_CTOR_CONSTRUCTOR) { - struct buffer_head * bh = (struct buffer_head *)data; - - memset(bh, 0, sizeof(*bh)); - INIT_LIST_HEAD(&bh->b_assoc_buffers); - } -} - static void buffer_exit_cpu(int cpu) { int i; @@ -2978,7 +2937,7 @@ static void buffer_exit_cpu(int cpu) static int buffer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) buffer_exit_cpu((unsigned long)hcpu); return NOTIFY_OK; } @@ -2987,12 +2946,8 @@ void __init buffer_init(void) { int nrpages; - bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD), - init_buffer_head, - NULL); + bh_cachep = KMEM_CACHE(buffer_head, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); /* * Limit the bh occupancy to 10% of ZONE_NORMAL diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 6017c465440..07838b2ac1c 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -7,16 +7,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> @@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length) char *charptr = data; char buf[10], line[80]; - printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", + printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", label, length, data); for (i = 0; i < length; i += 16) { line[0] = 0; @@ -60,10 +60,10 @@ cifs_dump_mem(char *label, void *data, int length) #ifdef CONFIG_CIFS_DEBUG2 void cifs_dump_detail(struct smb_hdr * smb) { - cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", + cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", smb->Command, smb->Status.CifsError, smb->Flags, smb->Flags2, smb->Mid, smb->Pid)); - cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb))); + cERROR(1, ("smb buf %p len %d", smb, smbCalcSize_LE(smb))); } @@ -72,36 +72,35 @@ void cifs_dump_mids(struct TCP_Server_Info * server) struct list_head *tmp; struct mid_q_entry * mid_entry; - if(server == NULL) + if (server == NULL) return; - cERROR(1,("Dump pending requests:")); + cERROR(1, ("Dump pending requests:")); spin_lock(&GlobalMid_Lock); list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if(mid_entry) { - cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", + if (mid_entry) { + cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", mid_entry->midState, (int)mid_entry->command, mid_entry->pid, mid_entry->tsk, mid_entry->mid)); #ifdef CONFIG_CIFS_STATS2 - cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld", + cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", mid_entry->largeBuf, mid_entry->resp_buf, mid_entry->when_received, jiffies)); #endif /* STATS2 */ - cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp, + cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, mid_entry->multiEnd)); - if(mid_entry->resp_buf) { + if (mid_entry->resp_buf) { cifs_dump_detail(mid_entry->resp_buf); cifs_dump_mem("existing buf: ", mid_entry->resp_buf, 62 /* fixme */); } - } } spin_unlock(&GlobalMid_Lock); @@ -129,9 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, "Display Internal CIFS Data Structures for Debugging\n" "---------------------------------------------------\n"); buf += length; - length = sprintf(buf,"CIFS Version %s\n",CIFS_VERSION); + length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION); buf += length; - length = sprintf(buf,"Active VFS Requests: %d\n", GlobalTotalActiveXid); + length = sprintf(buf, + "Active VFS Requests: %d\n", GlobalTotalActiveXid); buf += length; length = sprintf(buf, "Servers:"); buf += length; @@ -141,7 +141,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, list_for_each(tmp, &GlobalSMBSessionList) { i++; ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if((ses->serverDomain == NULL) || (ses->serverOS == NULL) || + if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || (ses->serverNOS == NULL)) { buf += sprintf(buf, "\nentry for %s not fully " "displayed\n\t", ses->serverName); @@ -149,15 +149,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, } else { length = sprintf(buf, - "\n%d) Name: %s Domain: %s Mounts: %d OS: %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB session status: %d\t", + "\n%d) Name: %s Domain: %s Mounts: %d OS:" + " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB" + " session status: %d\t", i, ses->serverName, ses->serverDomain, atomic_read(&ses->inUse), ses->serverOS, ses->serverNOS, - ses->capabilities,ses->status); + ses->capabilities, ses->status); buf += length; } - if(ses->server) { - buf += sprintf(buf, "TCP status: %d\n\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d", + if (ses->server) { + buf += sprintf(buf, "TCP status: %d\n\tLocal Users To " + "Server: %d SecMode: 0x%x Req On Wire: %d", ses->server->tcpStatus, atomic_read(&ses->server->socketUseCount), ses->server->secMode, @@ -165,7 +168,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, #ifdef CONFIG_CIFS_STATS2 buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d", - atomic_read(&ses->server->inSend), + atomic_read(&ses->server->inSend), atomic_read(&ses->server->num_waiters)); #endif @@ -177,17 +180,19 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, mid_entry = list_entry(tmp1, struct mid_q_entry, qhead); - if(mid_entry) { - length = sprintf(buf,"State: %d com: %d pid: %d tsk: %p mid %d\n", - mid_entry->midState, - (int)mid_entry->command, - mid_entry->pid, - mid_entry->tsk, - mid_entry->mid); + if (mid_entry) { + length = sprintf(buf, + "State: %d com: %d pid:" + " %d tsk: %p mid %d\n", + mid_entry->midState, + (int)mid_entry->command, + mid_entry->pid, + mid_entry->tsk, + mid_entry->mid); buf += length; } } - spin_unlock(&GlobalMid_Lock); + spin_unlock(&GlobalMid_Lock); } } @@ -207,7 +212,8 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); length = sprintf(buf, - "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x Attributes: 0x%x\nPathComponentMax: %d Status: %d", + "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x " + "Attributes: 0x%x\nPathComponentMax: %d Status: %d", i, tcon->treeName, atomic_read(&tcon->useCount), tcon->nativeFileSystem, @@ -215,7 +221,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, le32_to_cpu(tcon->fsAttrInfo.Attributes), le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), tcon->tidStatus); - buf += length; + buf += length; if (dev_type == FILE_DEVICE_DISK) length = sprintf(buf, " type: DISK "); else if (dev_type == FILE_DEVICE_CD_ROM) @@ -224,7 +230,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, length = sprintf(buf, " type: %d ", dev_type); buf += length; - if(tcon->tidStatus == CifsNeedReconnect) { + if (tcon->tidStatus == CifsNeedReconnect) { buf += sprintf(buf, "\tDISCONNECTED "); length += 14; } @@ -238,9 +244,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, /* Now calculate total size of returned data */ length = buf - original_buf; - if(offset + count >= length) + if (offset + count >= length) *eof = 1; - if(length < offset) { + if (length < offset) { *eof = 1; return 0; } else { @@ -256,18 +262,18 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, static int cifs_stats_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { - char c; - int rc; + char c; + int rc; struct list_head *tmp; struct cifsTconInfo *tcon; - rc = get_user(c, buffer); - if (rc) - return rc; + rc = get_user(c, buffer); + if (rc) + return rc; - if (c == '1' || c == 'y' || c == 'Y' || c == '0') { + if (c == '1' || c == 'y' || c == 'Y' || c == '0') { read_lock(&GlobalSMBSeslock); #ifdef CONFIG_CIFS_STATS2 atomic_set(&totBufAllocCount, 0); @@ -297,14 +303,14 @@ cifs_stats_write(struct file *file, const char __user *buffer, read_unlock(&GlobalSMBSeslock); } - return count; + return count; } static int cifs_stats_read(char *buf, char **beginBuffer, off_t offset, int count, int *eof, void *data) { - int item_length,i,length; + int item_length, i, length; struct list_head *tmp; struct cifsTconInfo *tcon; @@ -314,44 +320,44 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, "Resources in use\nCIFS Session: %d\n", sesInfoAllocCount.counter); buf += length; - item_length = - sprintf(buf,"Share (unique mount targets): %d\n", + item_length = + sprintf(buf, "Share (unique mount targets): %d\n", tconInfoAllocCount.counter); length += item_length; - buf += item_length; - item_length = - sprintf(buf,"SMB Request/Response Buffer: %d Pool size: %d\n", + buf += item_length; + item_length = + sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n", bufAllocCount.counter, cifs_min_rcv + tcpSesAllocCount.counter); length += item_length; buf += item_length; - item_length = - sprintf(buf,"SMB Small Req/Resp Buffer: %d Pool size: %d\n", - smBufAllocCount.counter,cifs_min_small); + item_length = + sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n", + smBufAllocCount.counter, cifs_min_small); length += item_length; buf += item_length; #ifdef CONFIG_CIFS_STATS2 - item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", + item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", atomic_read(&totBufAllocCount), - atomic_read(&totSmBufAllocCount)); + atomic_read(&totSmBufAllocCount)); length += item_length; buf += item_length; #endif /* CONFIG_CIFS_STATS2 */ - item_length = - sprintf(buf,"Operations (MIDs): %d\n", + item_length = + sprintf(buf, "Operations (MIDs): %d\n", midCount.counter); length += item_length; buf += item_length; item_length = sprintf(buf, "\n%d session %d share reconnects\n", - tcpSesReconnectCount.counter,tconInfoReconnectCount.counter); + tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); length += item_length; buf += item_length; item_length = sprintf(buf, "Total vfs operations: %d maximum at one time: %d\n", - GlobalCurrentXid,GlobalMaxActiveXid); + GlobalCurrentXid, GlobalMaxActiveXid); length += item_length; buf += item_length; @@ -360,10 +366,10 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, list_for_each(tmp, &GlobalTreeConnectionList) { i++; tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - item_length = sprintf(buf,"\n%d) %s",i, tcon->treeName); + item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName); buf += item_length; length += item_length; - if(tcon->tidStatus == CifsNeedReconnect) { + if (tcon->tidStatus == CifsNeedReconnect) { buf += sprintf(buf, "\tDISCONNECTED "); length += 14; } @@ -380,15 +386,15 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, item_length = sprintf(buf, "\nWrites: %d Bytes: %lld", atomic_read(&tcon->num_writes), (long long)(tcon->bytes_written)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, + buf += item_length; + length += item_length; + item_length = sprintf(buf, "\nLocks: %d HardLinks: %d Symlinks: %d", - atomic_read(&tcon->num_locks), + atomic_read(&tcon->num_locks), atomic_read(&tcon->num_hardlinks), atomic_read(&tcon->num_symlinks)); - buf += item_length; - length += item_length; + buf += item_length; + length += item_length; item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d", atomic_read(&tcon->num_opens), @@ -415,12 +421,12 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, } read_unlock(&GlobalSMBSeslock); - buf += sprintf(buf,"\n"); + buf += sprintf(buf, "\n"); length++; - if(offset + count >= length) + if (offset + count >= length) *eof = 1; - if(length < offset) { + if (length < offset) { *eof = 1; return 0; } else { @@ -428,7 +434,7 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset, } if (length > count) length = count; - + return length; } #endif @@ -547,11 +553,11 @@ cifs_proc_clean(void) remove_proc_entry("MultiuserMount", proc_fs_cifs); remove_proc_entry("OplockEnabled", proc_fs_cifs); /* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */ - remove_proc_entry("SecurityFlags",proc_fs_cifs); -/* remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */ - remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs); - remove_proc_entry("Experimental",proc_fs_cifs); - remove_proc_entry("LookupCacheEnabled",proc_fs_cifs); + remove_proc_entry("SecurityFlags", proc_fs_cifs); +/* remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */ + remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); + remove_proc_entry("Experimental", proc_fs_cifs); + remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); remove_proc_entry("cifs", proc_root_fs); } @@ -590,7 +596,7 @@ cifsFYI_write(struct file *file, const char __user *buffer, cifsFYI = 0; else if (c == '1' || c == 'y' || c == 'Y') cifsFYI = 1; - else if((c > '1') && (c <= '9')) + else if ((c > '1') && (c <= '9')) cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */ return count; @@ -637,28 +643,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer, static int experimEnabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { - int len; + int len; - len = sprintf(page, "%d\n", experimEnabled); + len = sprintf(page, "%d\n", experimEnabled); - len -= off; - *start = page + off; + len -= off; + *start = page + off; - if (len > count) - len = count; - else - *eof = 1; + if (len > count) + len = count; + else + *eof = 1; - if (len < 0) - len = 0; + if (len < 0) + len = 0; - return len; + return len; } static int experimEnabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { char c; int rc; @@ -678,46 +684,46 @@ experimEnabled_write(struct file *file, const char __user *buffer, static int linuxExtensionsEnabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { - int len; + int len; - len = sprintf(page, "%d\n", linuxExtEnabled); - len -= off; - *start = page + off; + len = sprintf(page, "%d\n", linuxExtEnabled); + len -= off; + *start = page + off; - if (len > count) - len = count; - else - *eof = 1; + if (len > count) + len = count; + else + *eof = 1; - if (len < 0) - len = 0; + if (len < 0) + len = 0; - return len; + return len; } static int linuxExtensionsEnabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { - char c; - int rc; - - rc = get_user(c, buffer); - if (rc) - return rc; - if (c == '0' || c == 'n' || c == 'N') - linuxExtEnabled = 0; - else if (c == '1' || c == 'y' || c == 'Y') - linuxExtEnabled = 1; - - return count; + char c; + int rc; + + rc = get_user(c, buffer); + if (rc) + return rc; + if (c == '0' || c == 'n' || c == 'N') + linuxExtEnabled = 0; + else if (c == '1' || c == 'y' || c == 'Y') + linuxExtEnabled = 1; + + return count; } static int lookupFlag_read(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { int len; @@ -860,15 +866,15 @@ security_flags_write(struct file *file, const char __user *buffer, char flags_string[12]; char c; - if((count < 1) || (count > 11)) + if ((count < 1) || (count > 11)) return -EINVAL; memset(flags_string, 0, 12); - if(copy_from_user(flags_string, buffer, count)) + if (copy_from_user(flags_string, buffer, count)) return -EFAULT; - if(count < 3) { + if (count < 3) { /* single char or single char followed by null */ c = flags_string[0]; if (c == '0' || c == 'n' || c == 'N') @@ -881,15 +887,15 @@ security_flags_write(struct file *file, const char __user *buffer, flags = simple_strtoul(flags_string, NULL, 0); - cFYI(1,("sec flags 0x%x", flags)); + cFYI(1, ("sec flags 0x%x", flags)); - if(flags <= 0) { - cERROR(1,("invalid security flags %s",flags_string)); + if (flags <= 0) { + cERROR(1, ("invalid security flags %s", flags_string)); return -EINVAL; } - if(flags & ~CIFSSEC_MASK) { - cERROR(1,("attempt to set unsupported security flags 0x%x", + if (flags & ~CIFSSEC_MASK) { + cERROR(1, ("attempt to set unsupported security flags 0x%x", flags & ~CIFSSEC_MASK)); return -EINVAL; } diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 793c4b95c16..701e9a9185f 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -6,16 +6,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> @@ -32,7 +32,7 @@ * */ int -cifs_strfromUCS_le(char *to, const __le16 * from, +cifs_strfromUCS_le(char *to, const __le16 * from, int len, const struct nls_table *codepage) { int i; @@ -66,7 +66,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len, { int charlen; int i; - wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */ + wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */ for (i = 0; len && *from; i++, from += charlen, len -= charlen) { @@ -79,7 +79,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len, /* A question mark */ to[i] = cpu_to_le16(0x003f); charlen = 1; - } else + } else to[i] = cpu_to_le16(wchar_to[i]); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8568e100953..7c04752b76c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -701,10 +701,8 @@ cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags) { struct cifsInodeInfo *cifsi = inode; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&cifsi->vfs_inode); - INIT_LIST_HEAD(&cifsi->lockList); - } + inode_init_once(&cifsi->vfs_inode); + INIT_LIST_HEAD(&cifsi->lockList); } static int @@ -827,8 +825,8 @@ cifs_init_mids(void) sizeof (struct oplock_q_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (cifs_oplock_cachep == NULL) { - kmem_cache_destroy(cifs_mid_cachep); mempool_destroy(cifs_mid_poolp); + kmem_cache_destroy(cifs_mid_cachep); return -ENOMEM; } diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 14de58fa143..57419a17668 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -433,8 +433,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) cFYI(1,("secFlags 0x%x",secFlags)); pSMB->hdr.Mid = GetNextMid(server); - pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; - if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) + pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); + if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; count = 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 216fb625843..f4e92661b22 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2069,8 +2069,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, srvTcp->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); if (srvTcp->tsk) { + struct task_struct *tsk; + /* If we could verify that kthread_stop would + always wake up processes blocked in + tcp in recv_mesg then we could remove the + send_sig call */ send_sig(SIGKILL,srvTcp->tsk,1); - kthread_stop(srvTcp->tsk); + tsk = srvTcp->tsk; + if(tsk) + kthread_stop(tsk); } } /* If find_unc succeeded then rc == 0 so we can not end */ @@ -2085,8 +2092,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* if the socketUseCount is now zero */ if ((temp_rc == -ESHUTDOWN) && (pSesInfo->server) && (pSesInfo->server->tsk)) { + struct task_struct *tsk; send_sig(SIGKILL,pSesInfo->server->tsk,1); - kthread_stop(pSesInfo->server->tsk); + tsk = pSesInfo->server->tsk; + if (tsk) + kthread_stop(tsk); } } else cFYI(1, ("No session or bad tcon")); @@ -3334,7 +3344,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) return 0; } else if (rc == -ESHUTDOWN) { cFYI(1,("Waking up socket by sending it signal")); - if(cifsd_task) { + if (cifsd_task) { send_sig(SIGKILL,cifsd_task,1); kthread_stop(cifsd_task); } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index e5210519ac4..8e86aaceb68 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -2,7 +2,7 @@ * fs/cifs/dir.c * * vfs operations that deal with dentries - * + * * Copyright (C) International Business Machines Corp., 2002,2005 * Author(s): Steve French (sfrench@us.ibm.com) * @@ -34,11 +34,12 @@ static void renew_parental_timestamps(struct dentry *direntry) { - /* BB check if there is a way to get the kernel to do this or if we really need this */ + /* BB check if there is a way to get the kernel to do this or if we + really need this */ do { direntry->d_time = jiffies; direntry = direntry->d_parent; - } while (!IS_ROOT(direntry)); + } while (!IS_ROOT(direntry)); } /* Note: caller must free return buffer */ @@ -51,7 +52,7 @@ build_path_from_dentry(struct dentry *direntry) char *full_path; char dirsep; - if(direntry == NULL) + if (direntry == NULL) return NULL; /* not much we can do if dentry is freed and we need to reopen the file after it was closed implicitly when the server crashed */ @@ -59,18 +60,18 @@ build_path_from_dentry(struct dentry *direntry) dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb)); pplen = CIFS_SB(direntry->d_sb)->prepathlen; cifs_bp_rename_retry: - namelen = pplen; + namelen = pplen; for (temp = direntry; !IS_ROOT(temp);) { namelen += (1 + temp->d_name.len); temp = temp->d_parent; - if(temp == NULL) { - cERROR(1,("corrupt dentry")); + if (temp == NULL) { + cERROR(1, ("corrupt dentry")); return NULL; } } full_path = kmalloc(namelen+1, GFP_KERNEL); - if(full_path == NULL) + if (full_path == NULL) return full_path; full_path[namelen] = 0; /* trailing null */ for (temp = direntry; !IS_ROOT(temp);) { @@ -84,8 +85,8 @@ cifs_bp_rename_retry: cFYI(0, ("name: %s", full_path + namelen)); } temp = temp->d_parent; - if(temp == NULL) { - cERROR(1,("corrupt dentry")); + if (temp == NULL) { + cERROR(1, ("corrupt dentry")); kfree(full_path); return NULL; } @@ -94,7 +95,7 @@ cifs_bp_rename_retry: cERROR(1, ("did not end path lookup where expected namelen is %d", namelen)); - /* presumably this is only possible if racing with a rename + /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ kfree(full_path); @@ -106,7 +107,7 @@ cifs_bp_rename_retry: since the '\' is a valid posix character so we can not switch those safely to '/' if any are found in the middle of the prepath */ /* BB test paths to Windows with '/' in the midst of prepath */ - strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen); + strncpy(full_path, CIFS_SB(direntry->d_sb)->prepath, pplen); return full_path; } @@ -147,12 +148,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pTcon = cifs_sb->tcon; full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } - if(nd && (nd->flags & LOOKUP_OPEN)) { + if (nd && (nd->flags & LOOKUP_OPEN)) { int oflags = nd->intent.open.flags; desiredAccess = 0; @@ -164,28 +165,29 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, write_only = TRUE; } - if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) disposition = FILE_CREATE; - else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) disposition = FILE_OVERWRITE_IF; - else if((oflags & O_CREAT) == O_CREAT) + else if ((oflags & O_CREAT) == O_CREAT) disposition = FILE_OPEN_IF; else { - cFYI(1,("Create flag not set in create function")); + cFYI(1, ("Create flag not set in create function")); } } - /* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */ + /* BB add processing to set equivalent of mode - e.g. via CreateX with + ACLs */ if (oplockEnabled) oplock = REQ_OPLOCK; - buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); - if(buf == NULL) { + buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (buf == NULL) { kfree(full_path); FreeXid(xid); return -ENOMEM; } - if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) + if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, CREATE_NOT_DIR, &fileHandle, &oplock, buf, cifs_sb->local_nls, @@ -193,27 +195,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, else rc = -EIO; /* no NT SMB support fall into legacy open below */ - if(rc == -EIO) { + if (rc == -EIO) { /* old server, retry the open legacy style */ rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, desiredAccess, CREATE_NOT_DIR, &fileHandle, &oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } + } if (rc) { cFYI(1, ("cifs_create returned 0x%x", rc)); } else { /* If Open reported that we actually created a file then we now have to set the mode if possible */ if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && - (oplock & CIFS_CREATE_ACTION)) - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + (oplock & CIFS_CREATE_ACTION)) { + mode &= ~current->fs->umask; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, (__u64)current->fsuid, (__u64)current->fsgid, 0 /* dev */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, @@ -221,26 +224,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, (__u64)-1, 0 /* dev */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } - else { - /* BB implement mode setting via Windows security descriptors */ - /* eg CIFSSMBWinSetPerms(xid,pTcon,full_path,mode,-1,-1,local_nls);*/ - /* could set r/o dos attribute if mode & 0222 == 0 */ + } else { + /* BB implement mode setting via Windows security + descriptors e.g. */ + /* CIFSSMBWinSetPerms(xid,pTcon,path,mode,-1,-1,nls);*/ + + /* Could set r/o dos attribute if mode & 0222 == 0 */ } /* BB server might mask mode so we have to query for Unix case*/ if (pTcon->ses->capabilities & CAP_UNIX) rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb,xid); + inode->i_sb, xid); else { rc = cifs_get_inode_info(&newinode, full_path, - buf, inode->i_sb,xid); - if(newinode) { + buf, inode->i_sb, xid); + if (newinode) { newinode->i_mode = mode; - if((oplock & CIFS_CREATE_ACTION) && - (cifs_sb->mnt_cifs_flags & + if ((oplock & CIFS_CREATE_ACTION) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { newinode->i_uid = current->fsuid; newinode->i_gid = current->fsgid; @@ -259,14 +264,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, direntry->d_op = &cifs_dentry_ops; d_instantiate(direntry, newinode); } - if((nd->flags & LOOKUP_OPEN) == FALSE) { + if ((nd->flags & LOOKUP_OPEN) == FALSE) { /* mknod case - do not leave file open */ CIFSSMBClose(xid, pTcon, fileHandle); - } else if(newinode) { + } else if (newinode) { pCifsFile = kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL); - - if(pCifsFile == NULL) + + if (pCifsFile == NULL) goto cifs_create_out; pCifsFile->netfid = fileHandle; pCifsFile->pid = current->tgid; @@ -276,33 +281,33 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, init_MUTEX(&pCifsFile->fh_sem); mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); - atomic_set(&pCifsFile->wrtPending,0); + atomic_set(&pCifsFile->wrtPending, 0); - /* set the following in open now + /* set the following in open now pCifsFile->pfile = file; */ write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist,&pTcon->openFileList); + list_add(&pCifsFile->tlist, &pTcon->openFileList); pCifsInode = CIFS_I(newinode); - if(pCifsInode) { + if (pCifsInode) { /* if readable file instance put first in list*/ if (write_only == TRUE) { - list_add_tail(&pCifsFile->flist, + list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); } else { list_add(&pCifsFile->flist, &pCifsInode->openFileList); } - if((oplock & 0xF) == OPLOCK_EXCLUSIVE) { + if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { pCifsInode->clientCanCacheAll = TRUE; pCifsInode->clientCanCacheRead = TRUE; - cFYI(1,("Exclusive Oplock for inode %p", + cFYI(1, ("Exclusive Oplock inode %p", newinode)); - } else if((oplock & 0xF) == OPLOCK_READ) + } else if ((oplock & 0xF) == OPLOCK_READ) pCifsInode->clientCanCacheRead = TRUE; } write_unlock(&GlobalSMBSeslock); } - } + } cifs_create_out: kfree(buf); kfree(full_path); @@ -310,8 +315,8 @@ cifs_create_out: return rc; } -int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, - dev_t device_number) +int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, + dev_t device_number) { int rc = -EPERM; int xid; @@ -329,43 +334,45 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, pTcon = cifs_sb->tcon; full_path = build_path_from_dentry(direntry); - if(full_path == NULL) + if (full_path == NULL) rc = -ENOMEM; else if (pTcon->ses->capabilities & CAP_UNIX) { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + mode &= ~current->fs->umask; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode,(__u64)current->fsuid,(__u64)current->fsgid, + mode, (__u64)current->fsuid, + (__u64)current->fsgid, device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, (__u64)-1, (__u64)-1, device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } - if(!rc) { + if (!rc) { rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb,xid); + inode->i_sb, xid); if (pTcon->nocase) direntry->d_op = &cifs_ci_dentry_ops; else direntry->d_op = &cifs_dentry_ops; - if(rc == 0) + if (rc == 0) d_instantiate(direntry, newinode); } } else { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { int oplock = 0; u16 fileHandle; FILE_ALL_INFO * buf; - cFYI(1,("sfu compat create special file")); + cFYI(1, ("sfu compat create special file")); - buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); - if(buf == NULL) { + buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (buf == NULL) { kfree(full_path); FreeXid(xid); return -ENOMEM; @@ -373,39 +380,38 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE, /* fail if exists */ - GENERIC_WRITE /* BB would + GENERIC_WRITE /* BB would WRITE_OWNER | WRITE_DAC be better? */, /* Create a file and set the file attribute to SYSTEM */ CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, &fileHandle, &oplock, buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); /* BB FIXME - add handling for backlevel servers which need legacy open and check for all - calls to SMBOpen for fallback to - SMBLeagcyOpen */ - if(!rc) { + calls to SMBOpen for fallback to SMBLeagcyOpen */ + if (!rc) { /* BB Do not bother to decode buf since no local inode yet to put timestamps in, but we can reuse it safely */ int bytes_written; struct win_dev *pdev; pdev = (struct win_dev *)buf; - if(S_ISCHR(mode)) { + if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); pdev->major = cpu_to_le64(MAJOR(device_number)); - pdev->minor = + pdev->minor = cpu_to_le64(MINOR(device_number)); rc = CIFSSMBWrite(xid, pTcon, fileHandle, sizeof(struct win_dev), 0, &bytes_written, (char *)pdev, NULL, 0); - } else if(S_ISBLK(mode)) { + } else if (S_ISBLK(mode)) { memcpy(pdev->type, "IntxBLK", 8); pdev->major = cpu_to_le64(MAJOR(device_number)); @@ -432,7 +438,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, struct dentry * -cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct nameidata *nd) +cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, + struct nameidata *nd) { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ @@ -447,8 +454,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name (" parent inode = 0x%p name is: %s and dentry = 0x%p", parent_dir_inode, direntry->d_name.name, direntry)); - /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */ - /* check whether path exists */ cifs_sb = CIFS_SB(parent_dir_inode->i_sb); @@ -472,7 +477,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name deadlock in the cases (beginning of sys_rename itself) in which we already have the sb rename sem */ full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return ERR_PTR(-ENOMEM); } @@ -487,10 +492,10 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name if (pTcon->ses->capabilities & CAP_UNIX) rc = cifs_get_inode_info_unix(&newInode, full_path, - parent_dir_inode->i_sb,xid); + parent_dir_inode->i_sb, xid); else rc = cifs_get_inode_info(&newInode, full_path, NULL, - parent_dir_inode->i_sb,xid); + parent_dir_inode->i_sb, xid); if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) @@ -499,7 +504,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name direntry->d_op = &cifs_dentry_ops; d_add(direntry, newInode); - /* since paths are not looked up by component - the parent + /* since paths are not looked up by component - the parent directories are presumed to be good here */ renew_parental_timestamps(direntry); @@ -511,13 +516,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name else direntry->d_op = &cifs_dentry_ops; d_add(direntry, NULL); - /* if it was once a directory (but how can we tell?) we could do - shrink_dcache_parent(direntry); */ + /* if it was once a directory (but how can we tell?) we could do + shrink_dcache_parent(direntry); */ } else { - cERROR(1,("Error 0x%x on cifs_get_inode_info in lookup of %s", - rc,full_path)); - /* BB special case check for Access Denied - watch security - exposure of returning dir info implicitly via different rc + cERROR(1, ("Error 0x%x on cifs_get_inode_info in lookup of %s", + rc, full_path)); + /* BB special case check for Access Denied - watch security + exposure of returning dir info implicitly via different rc if file exists or not but no access BB */ } @@ -538,11 +543,11 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) } else { cFYI(1, ("neg dentry 0x%p name = %s", direntry, direntry->d_name.name)); - if(time_after(jiffies, direntry->d_time + HZ) || + if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) { d_drop(direntry); isValid = 0; - } + } } return isValid; @@ -559,8 +564,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) struct dentry_operations cifs_dentry_ops = { .d_revalidate = cifs_d_revalidate, -/* d_delete: cifs_d_delete, *//* not needed except for debugging */ - /* no need for d_hash, d_compare, d_release, d_iput ... yet. BB confirm this BB */ +/* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c index da12b482ebe..8e375bb4b37 100644 --- a/fs/cifs/fcntl.c +++ b/fs/cifs/fcntl.c @@ -2,7 +2,7 @@ * fs/cifs/fcntl.c * * vfs operations that deal with the file control API - * + * * Copyright (C) International Business Machines Corp., 2003,2004 * Author(s): Steve French (sfrench@us.ibm.com) * @@ -35,35 +35,34 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags) /* No way on Linux VFS to ask to monitor xattr changes (and no stream support either */ - if(fcntl_notify_flags & DN_ACCESS) { + if (fcntl_notify_flags & DN_ACCESS) { cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; } - if(fcntl_notify_flags & DN_MODIFY) { + if (fcntl_notify_flags & DN_MODIFY) { /* What does this mean on directories? */ cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_SIZE; } - if(fcntl_notify_flags & DN_CREATE) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | + if (fcntl_notify_flags & DN_CREATE) { + cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | FILE_NOTIFY_CHANGE_LAST_WRITE; } - if(fcntl_notify_flags & DN_DELETE) { + if (fcntl_notify_flags & DN_DELETE) { cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; } - if(fcntl_notify_flags & DN_RENAME) { + if (fcntl_notify_flags & DN_RENAME) { /* BB review this - checking various server behaviors */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | + cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | FILE_NOTIFY_CHANGE_FILE_NAME; } - if(fcntl_notify_flags & DN_ATTRIB) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | + if (fcntl_notify_flags & DN_ATTRIB) { + cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | FILE_NOTIFY_CHANGE_ATTRIBUTES; } -/* if(fcntl_notify_flags & DN_MULTISHOT) { +/* if (fcntl_notify_flags & DN_MULTISHOT) { cifs_ntfy_flags |= ; } */ /* BB fixme - not sure how to handle this with CIFS yet */ - return cifs_ntfy_flags; } @@ -78,8 +77,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg) __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES; __u16 netfid; - - if(experimEnabled == 0) + if (experimEnabled == 0) return 0; xid = GetXid(); @@ -88,21 +86,21 @@ int cifs_dir_notify(struct file * file, unsigned long arg) full_path = build_path_from_dentry(file->f_path.dentry); - if(full_path == NULL) { + if (full_path == NULL) { rc = -ENOMEM; } else { - cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg)); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, + cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg)); + rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ | SYNCHRONIZE, 0 /* create options */, - &netfid, &oplock,NULL, cifs_sb->local_nls, + &netfid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); /* BB fixme - add this handle to a notify handle list */ - if(rc) { - cFYI(1,("Could not open directory for notify")); + if (rc) { + cFYI(1, ("Could not open directory for notify")); } else { filter = convert_to_cifs_notify_flags(arg); - if(filter != 0) { - rc = CIFSSMBNotify(xid, pTcon, + if (filter != 0) { + rc = CIFSSMBNotify(xid, pTcon, 0 /* no subdirs */, netfid, filter, file, arg & DN_MULTISHOT, cifs_sb->local_nls); @@ -113,10 +111,10 @@ int cifs_dir_notify(struct file * file, unsigned long arg) it would close automatically but may be a way to do it easily when inode freed or when notify info is cleared/changed */ - cFYI(1,("notify rc %d",rc)); + cFYI(1, ("notify rc %d", rc)); } } - + FreeXid(xid); return rc; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index b570530f97b..94d5b49049d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -27,7 +27,6 @@ #include <linux/fcntl.h> #include <linux/pagemap.h> #include <linux/pagevec.h> -#include <linux/smp_lock.h> #include <linux/writeback.h> #include <linux/task_io_accounting_ops.h> #include <linux/delay.h> diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3e87dad3367..f0ff12b3f39 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -986,7 +986,8 @@ mkdir_get_info: * failed to get it from the server or was set bogus */ if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) + if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) { + mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, @@ -1004,7 +1005,7 @@ mkdir_get_info: cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } - else { + } else { /* BB to be implemented via Windows secrty descriptors eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, -1, -1, local_nls); */ diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index e34c7db00f6..a414f1775ae 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -30,7 +30,7 @@ #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) -int cifs_ioctl (struct inode * inode, struct file * filep, +int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg) { int rc = -ENOTTY; /* strange error - but the precedent */ @@ -47,13 +47,13 @@ int cifs_ioctl (struct inode * inode, struct file * filep, xid = GetXid(); - cFYI(1,("ioctl file %p cmd %u arg %lu",filep,command,arg)); + cFYI(1, ("ioctl file %p cmd %u arg %lu", filep, command, arg)); cifs_sb = CIFS_SB(inode->i_sb); #ifdef CONFIG_CIFS_POSIX tcon = cifs_sb->tcon; - if(tcon) + if (tcon) caps = le64_to_cpu(tcon->fsUnixInfo.Capability); else { rc = -EIO; @@ -62,24 +62,24 @@ int cifs_ioctl (struct inode * inode, struct file * filep, } #endif /* CONFIG_CIFS_POSIX */ - switch(command) { + switch (command) { case CIFS_IOC_CHECKUMOUNT: - cFYI(1,("User unmount attempted")); - if(cifs_sb->mnt_uid == current->uid) + cFYI(1, ("User unmount attempted")); + if (cifs_sb->mnt_uid == current->uid) rc = 0; else { rc = -EACCES; - cFYI(1,("uids do not match")); + cFYI(1, ("uids do not match")); } break; #ifdef CONFIG_CIFS_POSIX case FS_IOC_GETFLAGS: - if(CIFS_UNIX_EXTATTR_CAP & caps) { + if (CIFS_UNIX_EXTATTR_CAP & caps) { if (pSMBFile == NULL) break; rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, &ExtAttrBits, &ExtAttrMask); - if(rc == 0) + if (rc == 0) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, (int __user *)arg); @@ -87,8 +87,8 @@ int cifs_ioctl (struct inode * inode, struct file * filep, break; case FS_IOC_SETFLAGS: - if(CIFS_UNIX_EXTATTR_CAP & caps) { - if(get_user(ExtAttrBits,(int __user *)arg)) { + if (CIFS_UNIX_EXTATTR_CAP & caps) { + if (get_user(ExtAttrBits, (int __user *)arg)) { rc = -EFAULT; break; } @@ -96,16 +96,15 @@ int cifs_ioctl (struct inode * inode, struct file * filep, break; /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, extAttrBits, &ExtAttrMask);*/ - } - cFYI(1,("set flags not implemented yet")); + cFYI(1, ("set flags not implemented yet")); break; #endif /* CONFIG_CIFS_POSIX */ default: - cFYI(1,("unsupported ioctl")); + cFYI(1, ("unsupported ioctl")); break; } FreeXid(xid); return rc; -} +} diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b5364f90d55..c08bda9fcac 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -23,7 +23,6 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/stat.h> -#include <linux/smp_lock.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h index aede606132a..8b69fcceb59 100644 --- a/fs/cifs/rfc1002pdu.h +++ b/fs/cifs/rfc1002pdu.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */ diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 5d052713326..fcb88fa8d2f 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -16,6 +16,7 @@ #include <asm/uaccess.h> #include <linux/string.h> #include <linux/list.h> +#include <linux/sched.h> #include <linux/coda.h> #include <linux/coda_linux.h> diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 0aaff3651d1..dbff1bd4fb9 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -62,8 +62,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct coda_inode_info *ei = (struct coda_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } int coda_init_inodecache(void) diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index a5b5e631ba6..5faacdb1a47 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -16,7 +16,7 @@ #include <asm/system.h> #include <linux/signal.h> - +#include <linux/sched.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> diff --git a/fs/compat.c b/fs/compat.c index 72e5e692382..4db6216e526 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -15,6 +15,7 @@ * published by the Free Software Foundation. */ +#include <linux/kernel.h> #include <linux/linkage.h> #include <linux/compat.h> #include <linux/errno.h> @@ -24,10 +25,8 @@ #include <linux/namei.h> #include <linux/file.h> #include <linux/vfs.h> -#include <linux/ioctl32.h> #include <linux/ioctl.h> #include <linux/init.h> -#include <linux/sockios.h> /* for SIOCDEVPRIVATE */ #include <linux/smb.h> #include <linux/smb_mount.h> #include <linux/ncp_mount.h> @@ -45,13 +44,13 @@ #include <linux/personality.h> #include <linux/rwsem.h> #include <linux/tsacct_kern.h> +#include <linux/security.h> #include <linux/highmem.h> +#include <linux/signal.h> #include <linux/poll.h> #include <linux/mm.h> #include <linux/eventpoll.h> -#include <net/sock.h> /* siocdevprivate_ioctl */ - #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/ioctls.h> @@ -79,30 +78,57 @@ int compat_printk(const char *fmt, ...) */ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __user *t) { - struct timeval tv[2]; + struct timespec tv[2]; if (t) { if (get_user(tv[0].tv_sec, &t->actime) || get_user(tv[1].tv_sec, &t->modtime)) return -EFAULT; - tv[0].tv_usec = 0; - tv[1].tv_usec = 0; + tv[0].tv_nsec = 0; + tv[1].tv_nsec = 0; + } + return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); +} + +asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, struct compat_timespec __user *t, int flags) +{ + struct timespec tv[2]; + + if (t) { + if (get_compat_timespec(&tv[0], &t[0]) || + get_compat_timespec(&tv[1], &t[1])) + return -EFAULT; + + if ((tv[0].tv_nsec == UTIME_OMIT || tv[0].tv_nsec == UTIME_NOW) + && tv[0].tv_sec != 0) + return -EINVAL; + if ((tv[1].tv_nsec == UTIME_OMIT || tv[1].tv_nsec == UTIME_NOW) + && tv[1].tv_sec != 0) + return -EINVAL; + + if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) + return 0; } - return do_utimes(AT_FDCWD, filename, t ? tv : NULL); + return do_utimes(dfd, filename, t ? tv : NULL, flags); } asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t) { - struct timeval tv[2]; + struct timespec tv[2]; if (t) { if (get_user(tv[0].tv_sec, &t[0].tv_sec) || - get_user(tv[0].tv_usec, &t[0].tv_usec) || + get_user(tv[0].tv_nsec, &t[0].tv_usec) || get_user(tv[1].tv_sec, &t[1].tv_sec) || - get_user(tv[1].tv_usec, &t[1].tv_usec)) + get_user(tv[1].tv_nsec, &t[1].tv_usec)) return -EFAULT; + if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 || + tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0) + return -EINVAL; + tv[0].tv_nsec *= 1000; + tv[1].tv_nsec *= 1000; } - return do_utimes(dfd, filename, t ? tv : NULL); + return do_utimes(dfd, filename, t ? tv : NULL, 0); } asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t) @@ -312,163 +338,6 @@ out: return error; } -/* ioctl32 stuff, used by sparc64, parisc, s390x, ppc64, x86_64, MIPS */ - -#define IOCTL_HASHSIZE 256 -static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; - -static inline unsigned long ioctl32_hash(unsigned long cmd) -{ - return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; -} - -static void ioctl32_insert_translation(struct ioctl_trans *trans) -{ - unsigned long hash; - struct ioctl_trans *t; - - hash = ioctl32_hash (trans->cmd); - if (!ioctl32_hash_table[hash]) - ioctl32_hash_table[hash] = trans; - else { - t = ioctl32_hash_table[hash]; - while (t->next) - t = t->next; - trans->next = NULL; - t->next = trans; - } -} - -static int __init init_sys32_ioctl(void) -{ - int i; - - for (i = 0; i < ioctl_table_size; i++) { - if (ioctl_start[i].next != 0) { - printk("ioctl translation %d bad\n",i); - return -1; - } - - ioctl32_insert_translation(&ioctl_start[i]); - } - return 0; -} - -__initcall(init_sys32_ioctl); - -static void compat_ioctl_error(struct file *filp, unsigned int fd, - unsigned int cmd, unsigned long arg) -{ - char buf[10]; - char *fn = "?"; - char *path; - - /* find the name of the device. */ - path = (char *)__get_free_page(GFP_KERNEL); - if (path) { - fn = d_path(filp->f_path.dentry, filp->f_path.mnt, path, PAGE_SIZE); - if (IS_ERR(fn)) - fn = "?"; - } - - sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK); - if (!isprint(buf[1])) - sprintf(buf, "%02x", buf[1]); - compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) " - "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n", - current->comm, current->pid, - (int)fd, (unsigned int)cmd, buf, - (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK, - (unsigned int)arg, fn); - - if (path) - free_page((unsigned long)path); -} - -asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg) -{ - struct file *filp; - int error = -EBADF; - struct ioctl_trans *t; - int fput_needed; - - filp = fget_light(fd, &fput_needed); - if (!filp) - goto out; - - /* RED-PEN how should LSM module know it's handling 32bit? */ - error = security_file_ioctl(filp, cmd, arg); - if (error) - goto out_fput; - - /* - * To allow the compat_ioctl handlers to be self contained - * we need to check the common ioctls here first. - * Just handle them with the standard handlers below. - */ - switch (cmd) { - case FIOCLEX: - case FIONCLEX: - case FIONBIO: - case FIOASYNC: - case FIOQSIZE: - break; - - case FIBMAP: - case FIGETBSZ: - case FIONREAD: - if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) - break; - /*FALL THROUGH*/ - - default: - if (filp->f_op && filp->f_op->compat_ioctl) { - error = filp->f_op->compat_ioctl(filp, cmd, arg); - if (error != -ENOIOCTLCMD) - goto out_fput; - } - - if (!filp->f_op || - (!filp->f_op->ioctl && !filp->f_op->unlocked_ioctl)) - goto do_ioctl; - break; - } - - for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) { - if (t->cmd == cmd) - goto found_handler; - } - - if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) && - cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - error = siocdevprivate_ioctl(fd, cmd, arg); - } else { - static int count; - - if (++count <= 50) - compat_ioctl_error(filp, fd, cmd, arg); - error = -EINVAL; - } - - goto out_fput; - - found_handler: - if (t->handler) { - lock_kernel(); - error = t->handler(fd, cmd, arg, filp); - unlock_kernel(); - goto out_fput; - } - - do_ioctl: - error = vfs_ioctl(filp, fd, cmd, arg); - out_fput: - fput_light(filp, fput_needed); - out: - return error; -} - static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) { if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || @@ -902,8 +771,6 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, } #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) -#define COMPAT_ROUND_UP(x) (((x)+sizeof(compat_long_t)-1) & \ - ~(sizeof(compat_long_t)-1)) struct compat_old_linux_dirent { compat_ulong_t d_ino; @@ -991,7 +858,7 @@ static int compat_filldir(void *__buf, const char *name, int namlen, struct compat_linux_dirent __user * dirent; struct compat_getdents_callback *buf = __buf; compat_ulong_t d_ino; - int reclen = COMPAT_ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); + int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) @@ -1066,7 +933,6 @@ out: } #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 -#define COMPAT_ROUND_UP64(x) (((x)+sizeof(u64)-1) & ~(sizeof(u64)-1)) struct compat_getdents_callback64 { struct linux_dirent64 __user *current_dir; @@ -1081,7 +947,7 @@ static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t struct linux_dirent64 __user *dirent; struct compat_getdents_callback64 *buf = __buf; int jj = NAME_OFFSET(dirent); - int reclen = COMPAT_ROUND_UP64(jj + namlen + 1); + int reclen = ALIGN(jj + namlen + 1, sizeof(u64)); u64 off; buf->error = -EINVAL; /* only used if we fail.. */ @@ -1594,8 +1460,6 @@ out_ret: #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) -#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) - /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to * 64-bit unsigned longs. @@ -1604,7 +1468,7 @@ static int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, unsigned long *fdset) { - nr = ROUND_UP(nr, __COMPAT_NFDBITS); + nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); if (ufdset) { unsigned long odd; @@ -1638,7 +1502,7 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, unsigned long *fdset) { unsigned long odd; - nr = ROUND_UP(nr, __COMPAT_NFDBITS); + nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); if (!ufdset) return 0; @@ -1680,9 +1544,10 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) { fd_set_bits fds; - char *bits; + void *bits; int size, max_fds, ret = -EINVAL; struct fdtable *fdt; + long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; if (n < 0) goto out_nofds; @@ -1700,11 +1565,14 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, * since we used fdset we need to allocate memory in units of * long-words. */ - ret = -ENOMEM; size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); - if (!bits) - goto out_nofds; + bits = stack_fds; + if (size > sizeof(stack_fds) / 6) { + bits = kmalloc(6 * size, GFP_KERNEL); + ret = -ENOMEM; + if (!bits) + goto out_nofds; + } fds.in = (unsigned long *) bits; fds.out = (unsigned long *) (bits + size); fds.ex = (unsigned long *) (bits + 2*size); @@ -1736,7 +1604,8 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_set_fd_set(n, exp, fds.res_ex)) ret = -EFAULT; out: - kfree(bits); + if (bits != stack_fds) + kfree(bits); out_nofds: return ret; } @@ -1760,7 +1629,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) timeout = -1; /* infinite */ else { - timeout = ROUND_UP(tv.tv_usec, 1000000/HZ); + timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ); timeout += tv.tv_sec * HZ; } } @@ -1828,7 +1697,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, do { if (tsp) { if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) { - timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ); + timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); timeout += ts.tv_sec * (unsigned long)HZ; ts.tv_sec = 0; ts.tv_nsec = 0; @@ -1924,7 +1793,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, /* We assume that ts.tv_sec is always lower than the number of seconds that can be expressed in an s64. Otherwise the compiler bitches at us */ - timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ); + timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); timeout += ts.tv_sec * HZ; } @@ -2336,3 +2205,46 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, #endif /* TIF_RESTORE_SIGMASK */ #endif /* CONFIG_EPOLL */ + +#ifdef CONFIG_SIGNALFD + +asmlinkage long compat_sys_signalfd(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) +{ + compat_sigset_t ss32; + sigset_t tmp; + sigset_t __user *ksigmask; + + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + return -EFAULT; + sigset_from_compat(&tmp, &ss32); + ksigmask = compat_alloc_user_space(sizeof(sigset_t)); + if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) + return -EFAULT; + + return sys_signalfd(ufd, ksigmask, sizeof(sigset_t)); +} + +#endif /* CONFIG_SIGNALFD */ + +#ifdef CONFIG_TIMERFD + +asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, + const struct compat_itimerspec __user *utmr) +{ + struct itimerspec t; + struct itimerspec __user *ut; + + if (get_compat_itimerspec(&t, utmr)) + return -EFAULT; + ut = compat_alloc_user_space(sizeof(*ut)); + if (copy_to_user(ut, &t, sizeof(t))) + return -EFAULT; + + return sys_timerfd(ufd, clockid, flags, ut); +} + +#endif /* CONFIG_TIMERFD */ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 464c04a9541..6b44cdc96fa 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -17,7 +17,6 @@ #include <linux/compiler.h> #include <linux/sched.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/ioctl.h> #include <linux/if.h> #include <linux/if_bridge.h> @@ -58,7 +57,6 @@ #include <linux/serial.h> #include <linux/if_tun.h> #include <linux/ctype.h> -#include <linux/ioctl32.h> #include <linux/syscalls.h> #include <linux/i2c.h> #include <linux/i2c-dev.h> @@ -66,7 +64,6 @@ #include <linux/atalk.h> #include <linux/blktrace_api.h> -#include <net/sock.h> /* siocdevprivate_ioctl */ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci.h> #include <net/bluetooth/rfcomm.h> @@ -475,7 +472,7 @@ static int bond_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) }; } -int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +static int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { struct ifreq __user *u_ifreq64; struct ifreq32 __user *u_ifreq32 = compat_ptr(arg); @@ -687,8 +684,10 @@ static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg) if (!err) { err = copy_to_user (ugeo, &geo, 4); err |= __put_user (geo.start, &ugeo->start); + if (err) + err = -EFAULT; } - return err ? -EFAULT : 0; + return err; } static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) @@ -1195,6 +1194,7 @@ static int vt_check(struct file *file) { struct tty_struct *tty; struct inode *inode = file->f_path.dentry->d_inode; + struct vc_data *vc; if (file->f_op->ioctl != tty_ioctl) return -EINVAL; @@ -1205,12 +1205,16 @@ static int vt_check(struct file *file) if (tty->driver->ioctl != vt_ioctl) return -EINVAL; - + + vc = (struct vc_data *)tty->driver_data; + if (!vc_cons_allocated(vc->vc_num)) /* impossible? */ + return -ENOIOCTLCMD; + /* * To have permissions to do most of the vt ioctls, we either have - * to be the owner of the tty, or super-user. + * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG. */ - if (current->signal->tty == tty || capable(CAP_SYS_ADMIN)) + if (current->signal->tty == tty || capable(CAP_SYS_TTY_CONFIG)) return 1; return 0; } @@ -1311,16 +1315,28 @@ static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct unimapdesc32 tmp; struct unimapdesc32 __user *user_ud = compat_ptr(arg); int perm = vt_check(file); - - if (perm < 0) return perm; + struct vc_data *vc; + + if (perm < 0) + return perm; if (copy_from_user(&tmp, user_ud, sizeof tmp)) return -EFAULT; + if (tmp.entries) + if (!access_ok(VERIFY_WRITE, compat_ptr(tmp.entries), + tmp.entry_ct*sizeof(struct unipair))) + return -EFAULT; + vc = ((struct tty_struct *)file->private_data)->driver_data; switch (cmd) { case PIO_UNIMAP: - if (!perm) return -EPERM; - return con_set_unimap(vc_cons[fg_console].d, tmp.entry_ct, compat_ptr(tmp.entries)); + if (!perm) + return -EPERM; + return con_set_unimap(vc, tmp.entry_ct, + compat_ptr(tmp.entries)); case GIO_UNIMAP: - return con_get_unimap(vc_cons[fg_console].d, tmp.entry_ct, &(user_ud->entry_ct), compat_ptr(tmp.entries)); + if (!perm && fg_console != vc->vc_num) + return -EPERM; + return con_get_unimap(vc, tmp.entry_ct, &(user_ud->entry_ct), + compat_ptr(tmp.entries)); } return 0; } @@ -2385,6 +2401,16 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg) return sys_ioctl(fd, cmd, (unsigned long)tn); } + +typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, + unsigned long, struct file *); + +struct ioctl_trans { + unsigned long cmd; + ioctl_trans_handler_t handler; + struct ioctl_trans *next; +}; + #define HANDLE_IOCTL(cmd,handler) \ { (cmd), (ioctl_trans_handler_t)(handler) }, @@ -2405,8 +2431,835 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg) Most other reasons are not valid. */ #define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd) -struct ioctl_trans ioctl_start[] = { -#include <linux/compat_ioctl.h> +static struct ioctl_trans ioctl_start[] = { +/* compatible ioctls first */ +COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */ +COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */ + +/* Big T */ +COMPATIBLE_IOCTL(TCGETA) +COMPATIBLE_IOCTL(TCSETA) +COMPATIBLE_IOCTL(TCSETAW) +COMPATIBLE_IOCTL(TCSETAF) +COMPATIBLE_IOCTL(TCSBRK) +ULONG_IOCTL(TCSBRKP) +COMPATIBLE_IOCTL(TCXONC) +COMPATIBLE_IOCTL(TCFLSH) +COMPATIBLE_IOCTL(TCGETS) +COMPATIBLE_IOCTL(TCSETS) +COMPATIBLE_IOCTL(TCSETSW) +COMPATIBLE_IOCTL(TCSETSF) +COMPATIBLE_IOCTL(TIOCLINUX) +COMPATIBLE_IOCTL(TIOCSBRK) +COMPATIBLE_IOCTL(TIOCCBRK) +ULONG_IOCTL(TIOCMIWAIT) +COMPATIBLE_IOCTL(TIOCGICOUNT) +/* Little t */ +COMPATIBLE_IOCTL(TIOCGETD) +COMPATIBLE_IOCTL(TIOCSETD) +COMPATIBLE_IOCTL(TIOCEXCL) +COMPATIBLE_IOCTL(TIOCNXCL) +COMPATIBLE_IOCTL(TIOCCONS) +COMPATIBLE_IOCTL(TIOCGSOFTCAR) +COMPATIBLE_IOCTL(TIOCSSOFTCAR) +COMPATIBLE_IOCTL(TIOCSWINSZ) +COMPATIBLE_IOCTL(TIOCGWINSZ) +COMPATIBLE_IOCTL(TIOCMGET) +COMPATIBLE_IOCTL(TIOCMBIC) +COMPATIBLE_IOCTL(TIOCMBIS) +COMPATIBLE_IOCTL(TIOCMSET) +COMPATIBLE_IOCTL(TIOCPKT) +COMPATIBLE_IOCTL(TIOCNOTTY) +COMPATIBLE_IOCTL(TIOCSTI) +COMPATIBLE_IOCTL(TIOCOUTQ) +COMPATIBLE_IOCTL(TIOCSPGRP) +COMPATIBLE_IOCTL(TIOCGPGRP) +ULONG_IOCTL(TIOCSCTTY) +COMPATIBLE_IOCTL(TIOCGPTN) +COMPATIBLE_IOCTL(TIOCSPTLCK) +COMPATIBLE_IOCTL(TIOCSERGETLSR) +/* Little f */ +COMPATIBLE_IOCTL(FIOCLEX) +COMPATIBLE_IOCTL(FIONCLEX) +COMPATIBLE_IOCTL(FIOASYNC) +COMPATIBLE_IOCTL(FIONBIO) +COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */ +/* 0x00 */ +COMPATIBLE_IOCTL(FIBMAP) +COMPATIBLE_IOCTL(FIGETBSZ) +/* 0x03 -- HD/IDE ioctl's used by hdparm and friends. + * Some need translations, these do not. + */ +COMPATIBLE_IOCTL(HDIO_GET_IDENTITY) +COMPATIBLE_IOCTL(HDIO_DRIVE_TASK) +COMPATIBLE_IOCTL(HDIO_DRIVE_CMD) +ULONG_IOCTL(HDIO_SET_MULTCOUNT) +ULONG_IOCTL(HDIO_SET_UNMASKINTR) +ULONG_IOCTL(HDIO_SET_KEEPSETTINGS) +ULONG_IOCTL(HDIO_SET_32BIT) +ULONG_IOCTL(HDIO_SET_NOWERR) +ULONG_IOCTL(HDIO_SET_DMA) +ULONG_IOCTL(HDIO_SET_PIO_MODE) +ULONG_IOCTL(HDIO_SET_NICE) +ULONG_IOCTL(HDIO_SET_WCACHE) +ULONG_IOCTL(HDIO_SET_ACOUSTIC) +ULONG_IOCTL(HDIO_SET_BUSSTATE) +ULONG_IOCTL(HDIO_SET_ADDRESS) +COMPATIBLE_IOCTL(HDIO_SCAN_HWIF) +/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */ +COMPATIBLE_IOCTL(0x330) +/* 0x02 -- Floppy ioctls */ +COMPATIBLE_IOCTL(FDMSGON) +COMPATIBLE_IOCTL(FDMSGOFF) +COMPATIBLE_IOCTL(FDSETEMSGTRESH) +COMPATIBLE_IOCTL(FDFLUSH) +COMPATIBLE_IOCTL(FDWERRORCLR) +COMPATIBLE_IOCTL(FDSETMAXERRS) +COMPATIBLE_IOCTL(FDGETMAXERRS) +COMPATIBLE_IOCTL(FDGETDRVTYP) +COMPATIBLE_IOCTL(FDEJECT) +COMPATIBLE_IOCTL(FDCLRPRM) +COMPATIBLE_IOCTL(FDFMTBEG) +COMPATIBLE_IOCTL(FDFMTEND) +COMPATIBLE_IOCTL(FDRESET) +COMPATIBLE_IOCTL(FDTWADDLE) +COMPATIBLE_IOCTL(FDFMTTRK) +COMPATIBLE_IOCTL(FDRAWCMD) +/* 0x12 */ +#ifdef CONFIG_BLOCK +COMPATIBLE_IOCTL(BLKRASET) +COMPATIBLE_IOCTL(BLKROSET) +COMPATIBLE_IOCTL(BLKROGET) +COMPATIBLE_IOCTL(BLKRRPART) +COMPATIBLE_IOCTL(BLKFLSBUF) +COMPATIBLE_IOCTL(BLKSECTSET) +COMPATIBLE_IOCTL(BLKSSZGET) +COMPATIBLE_IOCTL(BLKTRACESTART) +COMPATIBLE_IOCTL(BLKTRACESTOP) +COMPATIBLE_IOCTL(BLKTRACESETUP) +COMPATIBLE_IOCTL(BLKTRACETEARDOWN) +ULONG_IOCTL(BLKRASET) +ULONG_IOCTL(BLKFRASET) +#endif +/* RAID */ +COMPATIBLE_IOCTL(RAID_VERSION) +COMPATIBLE_IOCTL(GET_ARRAY_INFO) +COMPATIBLE_IOCTL(GET_DISK_INFO) +COMPATIBLE_IOCTL(PRINT_RAID_DEBUG) +COMPATIBLE_IOCTL(RAID_AUTORUN) +COMPATIBLE_IOCTL(CLEAR_ARRAY) +COMPATIBLE_IOCTL(ADD_NEW_DISK) +ULONG_IOCTL(HOT_REMOVE_DISK) +COMPATIBLE_IOCTL(SET_ARRAY_INFO) +COMPATIBLE_IOCTL(SET_DISK_INFO) +COMPATIBLE_IOCTL(WRITE_RAID_INFO) +COMPATIBLE_IOCTL(UNPROTECT_ARRAY) +COMPATIBLE_IOCTL(PROTECT_ARRAY) +ULONG_IOCTL(HOT_ADD_DISK) +ULONG_IOCTL(SET_DISK_FAULTY) +COMPATIBLE_IOCTL(RUN_ARRAY) +COMPATIBLE_IOCTL(STOP_ARRAY) +COMPATIBLE_IOCTL(STOP_ARRAY_RO) +COMPATIBLE_IOCTL(RESTART_ARRAY_RW) +COMPATIBLE_IOCTL(GET_BITMAP_FILE) +ULONG_IOCTL(SET_BITMAP_FILE) +/* DM */ +COMPATIBLE_IOCTL(DM_VERSION_32) +COMPATIBLE_IOCTL(DM_REMOVE_ALL_32) +COMPATIBLE_IOCTL(DM_LIST_DEVICES_32) +COMPATIBLE_IOCTL(DM_DEV_CREATE_32) +COMPATIBLE_IOCTL(DM_DEV_REMOVE_32) +COMPATIBLE_IOCTL(DM_DEV_RENAME_32) +COMPATIBLE_IOCTL(DM_DEV_SUSPEND_32) +COMPATIBLE_IOCTL(DM_DEV_STATUS_32) +COMPATIBLE_IOCTL(DM_DEV_WAIT_32) +COMPATIBLE_IOCTL(DM_TABLE_LOAD_32) +COMPATIBLE_IOCTL(DM_TABLE_CLEAR_32) +COMPATIBLE_IOCTL(DM_TABLE_DEPS_32) +COMPATIBLE_IOCTL(DM_TABLE_STATUS_32) +COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32) +COMPATIBLE_IOCTL(DM_TARGET_MSG_32) +COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY_32) +COMPATIBLE_IOCTL(DM_VERSION) +COMPATIBLE_IOCTL(DM_REMOVE_ALL) +COMPATIBLE_IOCTL(DM_LIST_DEVICES) +COMPATIBLE_IOCTL(DM_DEV_CREATE) +COMPATIBLE_IOCTL(DM_DEV_REMOVE) +COMPATIBLE_IOCTL(DM_DEV_RENAME) +COMPATIBLE_IOCTL(DM_DEV_SUSPEND) +COMPATIBLE_IOCTL(DM_DEV_STATUS) +COMPATIBLE_IOCTL(DM_DEV_WAIT) +COMPATIBLE_IOCTL(DM_TABLE_LOAD) +COMPATIBLE_IOCTL(DM_TABLE_CLEAR) +COMPATIBLE_IOCTL(DM_TABLE_DEPS) +COMPATIBLE_IOCTL(DM_TABLE_STATUS) +COMPATIBLE_IOCTL(DM_LIST_VERSIONS) +COMPATIBLE_IOCTL(DM_TARGET_MSG) +COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY) +/* Big K */ +COMPATIBLE_IOCTL(PIO_FONT) +COMPATIBLE_IOCTL(GIO_FONT) +ULONG_IOCTL(KDSIGACCEPT) +COMPATIBLE_IOCTL(KDGETKEYCODE) +COMPATIBLE_IOCTL(KDSETKEYCODE) +ULONG_IOCTL(KIOCSOUND) +ULONG_IOCTL(KDMKTONE) +COMPATIBLE_IOCTL(KDGKBTYPE) +ULONG_IOCTL(KDSETMODE) +COMPATIBLE_IOCTL(KDGETMODE) +ULONG_IOCTL(KDSKBMODE) +COMPATIBLE_IOCTL(KDGKBMODE) +ULONG_IOCTL(KDSKBMETA) +COMPATIBLE_IOCTL(KDGKBMETA) +COMPATIBLE_IOCTL(KDGKBENT) +COMPATIBLE_IOCTL(KDSKBENT) +COMPATIBLE_IOCTL(KDGKBSENT) +COMPATIBLE_IOCTL(KDSKBSENT) +COMPATIBLE_IOCTL(KDGKBDIACR) +COMPATIBLE_IOCTL(KDSKBDIACR) +COMPATIBLE_IOCTL(KDKBDREP) +COMPATIBLE_IOCTL(KDGKBLED) +ULONG_IOCTL(KDSKBLED) +COMPATIBLE_IOCTL(KDGETLED) +ULONG_IOCTL(KDSETLED) +COMPATIBLE_IOCTL(GIO_SCRNMAP) +COMPATIBLE_IOCTL(PIO_SCRNMAP) +COMPATIBLE_IOCTL(GIO_UNISCRNMAP) +COMPATIBLE_IOCTL(PIO_UNISCRNMAP) +COMPATIBLE_IOCTL(PIO_FONTRESET) +COMPATIBLE_IOCTL(PIO_UNIMAPCLR) +/* Big S */ +COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN) +COMPATIBLE_IOCTL(SCSI_IOCTL_DOORLOCK) +COMPATIBLE_IOCTL(SCSI_IOCTL_DOORUNLOCK) +COMPATIBLE_IOCTL(SCSI_IOCTL_TEST_UNIT_READY) +COMPATIBLE_IOCTL(SCSI_IOCTL_GET_BUS_NUMBER) +COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND) +COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) +COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) +/* Big T */ +COMPATIBLE_IOCTL(TUNSETNOCSUM) +COMPATIBLE_IOCTL(TUNSETDEBUG) +COMPATIBLE_IOCTL(TUNSETPERSIST) +COMPATIBLE_IOCTL(TUNSETOWNER) +/* Big V */ +COMPATIBLE_IOCTL(VT_SETMODE) +COMPATIBLE_IOCTL(VT_GETMODE) +COMPATIBLE_IOCTL(VT_GETSTATE) +COMPATIBLE_IOCTL(VT_OPENQRY) +ULONG_IOCTL(VT_ACTIVATE) +ULONG_IOCTL(VT_WAITACTIVE) +ULONG_IOCTL(VT_RELDISP) +ULONG_IOCTL(VT_DISALLOCATE) +COMPATIBLE_IOCTL(VT_RESIZE) +COMPATIBLE_IOCTL(VT_RESIZEX) +COMPATIBLE_IOCTL(VT_LOCKSWITCH) +COMPATIBLE_IOCTL(VT_UNLOCKSWITCH) +COMPATIBLE_IOCTL(VT_GETHIFONTMASK) +/* Little p (/dev/rtc, /dev/envctrl, etc.) */ +COMPATIBLE_IOCTL(RTC_AIE_ON) +COMPATIBLE_IOCTL(RTC_AIE_OFF) +COMPATIBLE_IOCTL(RTC_UIE_ON) +COMPATIBLE_IOCTL(RTC_UIE_OFF) +COMPATIBLE_IOCTL(RTC_PIE_ON) +COMPATIBLE_IOCTL(RTC_PIE_OFF) +COMPATIBLE_IOCTL(RTC_WIE_ON) +COMPATIBLE_IOCTL(RTC_WIE_OFF) +COMPATIBLE_IOCTL(RTC_ALM_SET) +COMPATIBLE_IOCTL(RTC_ALM_READ) +COMPATIBLE_IOCTL(RTC_RD_TIME) +COMPATIBLE_IOCTL(RTC_SET_TIME) +COMPATIBLE_IOCTL(RTC_WKALM_SET) +COMPATIBLE_IOCTL(RTC_WKALM_RD) +/* + * These two are only for the sbus rtc driver, but + * hwclock tries them on every rtc device first when + * running on sparc. On other architectures the entries + * are useless but harmless. + */ +COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */ +COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */ +/* Little m */ +COMPATIBLE_IOCTL(MTIOCTOP) +/* Socket level stuff */ +COMPATIBLE_IOCTL(FIOQSIZE) +COMPATIBLE_IOCTL(FIOSETOWN) +COMPATIBLE_IOCTL(SIOCSPGRP) +COMPATIBLE_IOCTL(FIOGETOWN) +COMPATIBLE_IOCTL(SIOCGPGRP) +COMPATIBLE_IOCTL(SIOCATMARK) +COMPATIBLE_IOCTL(SIOCSIFLINK) +COMPATIBLE_IOCTL(SIOCSIFENCAP) +COMPATIBLE_IOCTL(SIOCGIFENCAP) +COMPATIBLE_IOCTL(SIOCSIFNAME) +COMPATIBLE_IOCTL(SIOCSARP) +COMPATIBLE_IOCTL(SIOCGARP) +COMPATIBLE_IOCTL(SIOCDARP) +COMPATIBLE_IOCTL(SIOCSRARP) +COMPATIBLE_IOCTL(SIOCGRARP) +COMPATIBLE_IOCTL(SIOCDRARP) +COMPATIBLE_IOCTL(SIOCADDDLCI) +COMPATIBLE_IOCTL(SIOCDELDLCI) +COMPATIBLE_IOCTL(SIOCGMIIPHY) +COMPATIBLE_IOCTL(SIOCGMIIREG) +COMPATIBLE_IOCTL(SIOCSMIIREG) +COMPATIBLE_IOCTL(SIOCGIFVLAN) +COMPATIBLE_IOCTL(SIOCSIFVLAN) +COMPATIBLE_IOCTL(SIOCBRADDBR) +COMPATIBLE_IOCTL(SIOCBRDELBR) +/* SG stuff */ +COMPATIBLE_IOCTL(SG_SET_TIMEOUT) +COMPATIBLE_IOCTL(SG_GET_TIMEOUT) +COMPATIBLE_IOCTL(SG_EMULATED_HOST) +ULONG_IOCTL(SG_SET_TRANSFORM) +COMPATIBLE_IOCTL(SG_GET_TRANSFORM) +COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE) +COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE) +COMPATIBLE_IOCTL(SG_GET_SCSI_ID) +COMPATIBLE_IOCTL(SG_SET_FORCE_LOW_DMA) +COMPATIBLE_IOCTL(SG_GET_LOW_DMA) +COMPATIBLE_IOCTL(SG_SET_FORCE_PACK_ID) +COMPATIBLE_IOCTL(SG_GET_PACK_ID) +COMPATIBLE_IOCTL(SG_GET_NUM_WAITING) +COMPATIBLE_IOCTL(SG_SET_DEBUG) +COMPATIBLE_IOCTL(SG_GET_SG_TABLESIZE) +COMPATIBLE_IOCTL(SG_GET_COMMAND_Q) +COMPATIBLE_IOCTL(SG_SET_COMMAND_Q) +COMPATIBLE_IOCTL(SG_GET_VERSION_NUM) +COMPATIBLE_IOCTL(SG_NEXT_CMD_LEN) +COMPATIBLE_IOCTL(SG_SCSI_RESET) +COMPATIBLE_IOCTL(SG_GET_REQUEST_TABLE) +COMPATIBLE_IOCTL(SG_SET_KEEP_ORPHAN) +COMPATIBLE_IOCTL(SG_GET_KEEP_ORPHAN) +/* PPP stuff */ +COMPATIBLE_IOCTL(PPPIOCGFLAGS) +COMPATIBLE_IOCTL(PPPIOCSFLAGS) +COMPATIBLE_IOCTL(PPPIOCGASYNCMAP) +COMPATIBLE_IOCTL(PPPIOCSASYNCMAP) +COMPATIBLE_IOCTL(PPPIOCGUNIT) +COMPATIBLE_IOCTL(PPPIOCGRASYNCMAP) +COMPATIBLE_IOCTL(PPPIOCSRASYNCMAP) +COMPATIBLE_IOCTL(PPPIOCGMRU) +COMPATIBLE_IOCTL(PPPIOCSMRU) +COMPATIBLE_IOCTL(PPPIOCSMAXCID) +COMPATIBLE_IOCTL(PPPIOCGXASYNCMAP) +COMPATIBLE_IOCTL(PPPIOCSXASYNCMAP) +COMPATIBLE_IOCTL(PPPIOCXFERUNIT) +/* PPPIOCSCOMPRESS is translated */ +COMPATIBLE_IOCTL(PPPIOCGNPMODE) +COMPATIBLE_IOCTL(PPPIOCSNPMODE) +COMPATIBLE_IOCTL(PPPIOCGDEBUG) +COMPATIBLE_IOCTL(PPPIOCSDEBUG) +/* PPPIOCSPASS is translated */ +/* PPPIOCSACTIVE is translated */ +/* PPPIOCGIDLE is translated */ +COMPATIBLE_IOCTL(PPPIOCNEWUNIT) +COMPATIBLE_IOCTL(PPPIOCATTACH) +COMPATIBLE_IOCTL(PPPIOCDETACH) +COMPATIBLE_IOCTL(PPPIOCSMRRU) +COMPATIBLE_IOCTL(PPPIOCCONNECT) +COMPATIBLE_IOCTL(PPPIOCDISCONN) +COMPATIBLE_IOCTL(PPPIOCATTCHAN) +COMPATIBLE_IOCTL(PPPIOCGCHAN) +/* PPPOX */ +COMPATIBLE_IOCTL(PPPOEIOCSFWD) +COMPATIBLE_IOCTL(PPPOEIOCDFWD) +/* LP */ +COMPATIBLE_IOCTL(LPGETSTATUS) +/* ppdev */ +COMPATIBLE_IOCTL(PPSETMODE) +COMPATIBLE_IOCTL(PPRSTATUS) +COMPATIBLE_IOCTL(PPRCONTROL) +COMPATIBLE_IOCTL(PPWCONTROL) +COMPATIBLE_IOCTL(PPFCONTROL) +COMPATIBLE_IOCTL(PPRDATA) +COMPATIBLE_IOCTL(PPWDATA) +COMPATIBLE_IOCTL(PPCLAIM) +COMPATIBLE_IOCTL(PPRELEASE) +COMPATIBLE_IOCTL(PPYIELD) +COMPATIBLE_IOCTL(PPEXCL) +COMPATIBLE_IOCTL(PPDATADIR) +COMPATIBLE_IOCTL(PPNEGOT) +COMPATIBLE_IOCTL(PPWCTLONIRQ) +COMPATIBLE_IOCTL(PPCLRIRQ) +COMPATIBLE_IOCTL(PPSETPHASE) +COMPATIBLE_IOCTL(PPGETMODES) +COMPATIBLE_IOCTL(PPGETMODE) +COMPATIBLE_IOCTL(PPGETPHASE) +COMPATIBLE_IOCTL(PPGETFLAGS) +COMPATIBLE_IOCTL(PPSETFLAGS) +/* CDROM stuff */ +COMPATIBLE_IOCTL(CDROMPAUSE) +COMPATIBLE_IOCTL(CDROMRESUME) +COMPATIBLE_IOCTL(CDROMPLAYMSF) +COMPATIBLE_IOCTL(CDROMPLAYTRKIND) +COMPATIBLE_IOCTL(CDROMREADTOCHDR) +COMPATIBLE_IOCTL(CDROMREADTOCENTRY) +COMPATIBLE_IOCTL(CDROMSTOP) +COMPATIBLE_IOCTL(CDROMSTART) +COMPATIBLE_IOCTL(CDROMEJECT) +COMPATIBLE_IOCTL(CDROMVOLCTRL) +COMPATIBLE_IOCTL(CDROMSUBCHNL) +ULONG_IOCTL(CDROMEJECT_SW) +COMPATIBLE_IOCTL(CDROMMULTISESSION) +COMPATIBLE_IOCTL(CDROM_GET_MCN) +COMPATIBLE_IOCTL(CDROMRESET) +COMPATIBLE_IOCTL(CDROMVOLREAD) +COMPATIBLE_IOCTL(CDROMSEEK) +COMPATIBLE_IOCTL(CDROMPLAYBLK) +COMPATIBLE_IOCTL(CDROMCLOSETRAY) +ULONG_IOCTL(CDROM_SET_OPTIONS) +ULONG_IOCTL(CDROM_CLEAR_OPTIONS) +ULONG_IOCTL(CDROM_SELECT_SPEED) +ULONG_IOCTL(CDROM_SELECT_DISC) +ULONG_IOCTL(CDROM_MEDIA_CHANGED) +ULONG_IOCTL(CDROM_DRIVE_STATUS) +COMPATIBLE_IOCTL(CDROM_DISC_STATUS) +COMPATIBLE_IOCTL(CDROM_CHANGER_NSLOTS) +ULONG_IOCTL(CDROM_LOCKDOOR) +ULONG_IOCTL(CDROM_DEBUG) +COMPATIBLE_IOCTL(CDROM_GET_CAPABILITY) +/* Ignore cdrom.h about these next 5 ioctls, they absolutely do + * not take a struct cdrom_read, instead they take a struct cdrom_msf + * which is compatible. + */ +COMPATIBLE_IOCTL(CDROMREADMODE2) +COMPATIBLE_IOCTL(CDROMREADMODE1) +COMPATIBLE_IOCTL(CDROMREADRAW) +COMPATIBLE_IOCTL(CDROMREADCOOKED) +COMPATIBLE_IOCTL(CDROMREADALL) +/* DVD ioctls */ +COMPATIBLE_IOCTL(DVD_READ_STRUCT) +COMPATIBLE_IOCTL(DVD_WRITE_STRUCT) +COMPATIBLE_IOCTL(DVD_AUTH) +/* pktcdvd */ +COMPATIBLE_IOCTL(PACKET_CTRL_CMD) +/* Big A */ +/* sparc only */ +/* Big Q for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_SEQ_RESET) +COMPATIBLE_IOCTL(SNDCTL_SEQ_SYNC) +COMPATIBLE_IOCTL(SNDCTL_SYNTH_INFO) +COMPATIBLE_IOCTL(SNDCTL_SEQ_CTRLRATE) +COMPATIBLE_IOCTL(SNDCTL_SEQ_GETOUTCOUNT) +COMPATIBLE_IOCTL(SNDCTL_SEQ_GETINCOUNT) +COMPATIBLE_IOCTL(SNDCTL_SEQ_PERCMODE) +COMPATIBLE_IOCTL(SNDCTL_FM_LOAD_INSTR) +COMPATIBLE_IOCTL(SNDCTL_SEQ_TESTMIDI) +COMPATIBLE_IOCTL(SNDCTL_SEQ_RESETSAMPLES) +COMPATIBLE_IOCTL(SNDCTL_SEQ_NRSYNTHS) +COMPATIBLE_IOCTL(SNDCTL_SEQ_NRMIDIS) +COMPATIBLE_IOCTL(SNDCTL_MIDI_INFO) +COMPATIBLE_IOCTL(SNDCTL_SEQ_THRESHOLD) +COMPATIBLE_IOCTL(SNDCTL_SYNTH_MEMAVL) +COMPATIBLE_IOCTL(SNDCTL_FM_4OP_ENABLE) +COMPATIBLE_IOCTL(SNDCTL_SEQ_PANIC) +COMPATIBLE_IOCTL(SNDCTL_SEQ_OUTOFBAND) +COMPATIBLE_IOCTL(SNDCTL_SEQ_GETTIME) +COMPATIBLE_IOCTL(SNDCTL_SYNTH_ID) +COMPATIBLE_IOCTL(SNDCTL_SYNTH_CONTROL) +COMPATIBLE_IOCTL(SNDCTL_SYNTH_REMOVESAMPLE) +/* Big T for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_TMR_TIMEBASE) +COMPATIBLE_IOCTL(SNDCTL_TMR_START) +COMPATIBLE_IOCTL(SNDCTL_TMR_STOP) +COMPATIBLE_IOCTL(SNDCTL_TMR_CONTINUE) +COMPATIBLE_IOCTL(SNDCTL_TMR_TEMPO) +COMPATIBLE_IOCTL(SNDCTL_TMR_SOURCE) +COMPATIBLE_IOCTL(SNDCTL_TMR_METRONOME) +COMPATIBLE_IOCTL(SNDCTL_TMR_SELECT) +/* Little m for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_MIDI_PRETIME) +COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUMODE) +COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUCMD) +/* Big P for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_DSP_RESET) +COMPATIBLE_IOCTL(SNDCTL_DSP_SYNC) +COMPATIBLE_IOCTL(SNDCTL_DSP_SPEED) +COMPATIBLE_IOCTL(SNDCTL_DSP_STEREO) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETBLKSIZE) +COMPATIBLE_IOCTL(SNDCTL_DSP_CHANNELS) +COMPATIBLE_IOCTL(SOUND_PCM_WRITE_FILTER) +COMPATIBLE_IOCTL(SNDCTL_DSP_POST) +COMPATIBLE_IOCTL(SNDCTL_DSP_SUBDIVIDE) +COMPATIBLE_IOCTL(SNDCTL_DSP_SETFRAGMENT) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETFMTS) +COMPATIBLE_IOCTL(SNDCTL_DSP_SETFMT) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETOSPACE) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETISPACE) +COMPATIBLE_IOCTL(SNDCTL_DSP_NONBLOCK) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETCAPS) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETTRIGGER) +COMPATIBLE_IOCTL(SNDCTL_DSP_SETTRIGGER) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETIPTR) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETOPTR) +/* SNDCTL_DSP_MAPINBUF, XXX needs translation */ +/* SNDCTL_DSP_MAPOUTBUF, XXX needs translation */ +COMPATIBLE_IOCTL(SNDCTL_DSP_SETSYNCRO) +COMPATIBLE_IOCTL(SNDCTL_DSP_SETDUPLEX) +COMPATIBLE_IOCTL(SNDCTL_DSP_GETODELAY) +COMPATIBLE_IOCTL(SNDCTL_DSP_PROFILE) +COMPATIBLE_IOCTL(SOUND_PCM_READ_RATE) +COMPATIBLE_IOCTL(SOUND_PCM_READ_CHANNELS) +COMPATIBLE_IOCTL(SOUND_PCM_READ_BITS) +COMPATIBLE_IOCTL(SOUND_PCM_READ_FILTER) +/* Big C for sound/OSS */ +COMPATIBLE_IOCTL(SNDCTL_COPR_RESET) +COMPATIBLE_IOCTL(SNDCTL_COPR_LOAD) +COMPATIBLE_IOCTL(SNDCTL_COPR_RDATA) +COMPATIBLE_IOCTL(SNDCTL_COPR_RCODE) +COMPATIBLE_IOCTL(SNDCTL_COPR_WDATA) +COMPATIBLE_IOCTL(SNDCTL_COPR_WCODE) +COMPATIBLE_IOCTL(SNDCTL_COPR_RUN) +COMPATIBLE_IOCTL(SNDCTL_COPR_HALT) +COMPATIBLE_IOCTL(SNDCTL_COPR_SENDMSG) +COMPATIBLE_IOCTL(SNDCTL_COPR_RCVMSG) +/* Big M for sound/OSS */ +COMPATIBLE_IOCTL(SOUND_MIXER_READ_VOLUME) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_BASS) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_TREBLE) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_SYNTH) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_PCM) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_SPEAKER) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_MIC) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_CD) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_IMIX) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_ALTPCM) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECLEV) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_IGAIN) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_OGAIN) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE1) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE2) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE3) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL1)) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL2)) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL3)) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEIN)) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEOUT)) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_VIDEO)) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_RADIO)) +COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_MONITOR)) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_MUTE) +/* SOUND_MIXER_READ_ENHANCE, same value as READ_MUTE */ +/* SOUND_MIXER_READ_LOUD, same value as READ_MUTE */ +COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECSRC) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_DEVMASK) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECMASK) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_STEREODEVS) +COMPATIBLE_IOCTL(SOUND_MIXER_READ_CAPS) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_VOLUME) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_BASS) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_TREBLE) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SYNTH) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_PCM) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SPEAKER) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MIC) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_CD) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IMIX) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_ALTPCM) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECLEV) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IGAIN) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_OGAIN) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE1) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE2) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE3) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL1)) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL2)) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL3)) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEIN)) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEOUT)) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_VIDEO)) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_RADIO)) +COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_MONITOR)) +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MUTE) +/* SOUND_MIXER_WRITE_ENHANCE, same value as WRITE_MUTE */ +/* SOUND_MIXER_WRITE_LOUD, same value as WRITE_MUTE */ +COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECSRC) +COMPATIBLE_IOCTL(SOUND_MIXER_INFO) +COMPATIBLE_IOCTL(SOUND_OLD_MIXER_INFO) +COMPATIBLE_IOCTL(SOUND_MIXER_ACCESS) +COMPATIBLE_IOCTL(SOUND_MIXER_AGC) +COMPATIBLE_IOCTL(SOUND_MIXER_3DSE) +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE1) +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE2) +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE3) +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE4) +COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5) +COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) +COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) +COMPATIBLE_IOCTL(OSS_GETVERSION) +/* AUTOFS */ +ULONG_IOCTL(AUTOFS_IOC_READY) +ULONG_IOCTL(AUTOFS_IOC_FAIL) +COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC) +COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER) +COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) +COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) +COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER) +COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST) +COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST) +COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT) +/* Raw devices */ +COMPATIBLE_IOCTL(RAW_SETBIND) +COMPATIBLE_IOCTL(RAW_GETBIND) +/* SMB ioctls which do not need any translations */ +COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) +/* Little a */ +COMPATIBLE_IOCTL(ATMSIGD_CTRL) +COMPATIBLE_IOCTL(ATMARPD_CTRL) +COMPATIBLE_IOCTL(ATMLEC_CTRL) +COMPATIBLE_IOCTL(ATMLEC_MCAST) +COMPATIBLE_IOCTL(ATMLEC_DATA) +COMPATIBLE_IOCTL(ATM_SETSC) +COMPATIBLE_IOCTL(SIOCSIFATMTCP) +COMPATIBLE_IOCTL(SIOCMKCLIP) +COMPATIBLE_IOCTL(ATMARP_MKIP) +COMPATIBLE_IOCTL(ATMARP_SETENTRY) +COMPATIBLE_IOCTL(ATMARP_ENCAP) +COMPATIBLE_IOCTL(ATMTCP_CREATE) +COMPATIBLE_IOCTL(ATMTCP_REMOVE) +COMPATIBLE_IOCTL(ATMMPC_CTRL) +COMPATIBLE_IOCTL(ATMMPC_DATA) +/* Watchdog */ +COMPATIBLE_IOCTL(WDIOC_GETSUPPORT) +COMPATIBLE_IOCTL(WDIOC_GETSTATUS) +COMPATIBLE_IOCTL(WDIOC_GETBOOTSTATUS) +COMPATIBLE_IOCTL(WDIOC_GETTEMP) +COMPATIBLE_IOCTL(WDIOC_SETOPTIONS) +COMPATIBLE_IOCTL(WDIOC_KEEPALIVE) +COMPATIBLE_IOCTL(WDIOC_SETTIMEOUT) +COMPATIBLE_IOCTL(WDIOC_GETTIMEOUT) +/* Big R */ +COMPATIBLE_IOCTL(RNDGETENTCNT) +COMPATIBLE_IOCTL(RNDADDTOENTCNT) +COMPATIBLE_IOCTL(RNDGETPOOL) +COMPATIBLE_IOCTL(RNDADDENTROPY) +COMPATIBLE_IOCTL(RNDZAPENTCNT) +COMPATIBLE_IOCTL(RNDCLEARPOOL) +/* Bluetooth */ +COMPATIBLE_IOCTL(HCIDEVUP) +COMPATIBLE_IOCTL(HCIDEVDOWN) +COMPATIBLE_IOCTL(HCIDEVRESET) +COMPATIBLE_IOCTL(HCIDEVRESTAT) +COMPATIBLE_IOCTL(HCIGETDEVLIST) +COMPATIBLE_IOCTL(HCIGETDEVINFO) +COMPATIBLE_IOCTL(HCIGETCONNLIST) +COMPATIBLE_IOCTL(HCIGETCONNINFO) +COMPATIBLE_IOCTL(HCISETRAW) +COMPATIBLE_IOCTL(HCISETSCAN) +COMPATIBLE_IOCTL(HCISETAUTH) +COMPATIBLE_IOCTL(HCISETENCRYPT) +COMPATIBLE_IOCTL(HCISETPTYPE) +COMPATIBLE_IOCTL(HCISETLINKPOL) +COMPATIBLE_IOCTL(HCISETLINKMODE) +COMPATIBLE_IOCTL(HCISETACLMTU) +COMPATIBLE_IOCTL(HCISETSCOMTU) +COMPATIBLE_IOCTL(HCIINQUIRY) +COMPATIBLE_IOCTL(HCIUARTSETPROTO) +COMPATIBLE_IOCTL(HCIUARTGETPROTO) +COMPATIBLE_IOCTL(RFCOMMCREATEDEV) +COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) +COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) +COMPATIBLE_IOCTL(RFCOMMGETDEVINFO) +COMPATIBLE_IOCTL(RFCOMMSTEALDLC) +COMPATIBLE_IOCTL(BNEPCONNADD) +COMPATIBLE_IOCTL(BNEPCONNDEL) +COMPATIBLE_IOCTL(BNEPGETCONNLIST) +COMPATIBLE_IOCTL(BNEPGETCONNINFO) +COMPATIBLE_IOCTL(CMTPCONNADD) +COMPATIBLE_IOCTL(CMTPCONNDEL) +COMPATIBLE_IOCTL(CMTPGETCONNLIST) +COMPATIBLE_IOCTL(CMTPGETCONNINFO) +COMPATIBLE_IOCTL(HIDPCONNADD) +COMPATIBLE_IOCTL(HIDPCONNDEL) +COMPATIBLE_IOCTL(HIDPGETCONNLIST) +COMPATIBLE_IOCTL(HIDPGETCONNINFO) +/* CAPI */ +COMPATIBLE_IOCTL(CAPI_REGISTER) +COMPATIBLE_IOCTL(CAPI_GET_MANUFACTURER) +COMPATIBLE_IOCTL(CAPI_GET_VERSION) +COMPATIBLE_IOCTL(CAPI_GET_SERIAL) +COMPATIBLE_IOCTL(CAPI_GET_PROFILE) +COMPATIBLE_IOCTL(CAPI_MANUFACTURER_CMD) +COMPATIBLE_IOCTL(CAPI_GET_ERRCODE) +COMPATIBLE_IOCTL(CAPI_INSTALLED) +COMPATIBLE_IOCTL(CAPI_GET_FLAGS) +COMPATIBLE_IOCTL(CAPI_SET_FLAGS) +COMPATIBLE_IOCTL(CAPI_CLR_FLAGS) +COMPATIBLE_IOCTL(CAPI_NCCI_OPENCOUNT) +COMPATIBLE_IOCTL(CAPI_NCCI_GETUNIT) +/* Siemens Gigaset */ +COMPATIBLE_IOCTL(GIGASET_REDIR) +COMPATIBLE_IOCTL(GIGASET_CONFIG) +COMPATIBLE_IOCTL(GIGASET_BRKCHARS) +COMPATIBLE_IOCTL(GIGASET_VERSION) +/* Misc. */ +COMPATIBLE_IOCTL(0x41545900) /* ATYIO_CLKR */ +COMPATIBLE_IOCTL(0x41545901) /* ATYIO_CLKW */ +COMPATIBLE_IOCTL(PCIIOC_CONTROLLER) +COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO) +COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM) +COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE) +/* USB */ +COMPATIBLE_IOCTL(USBDEVFS_RESETEP) +COMPATIBLE_IOCTL(USBDEVFS_SETINTERFACE) +COMPATIBLE_IOCTL(USBDEVFS_SETCONFIGURATION) +COMPATIBLE_IOCTL(USBDEVFS_GETDRIVER) +COMPATIBLE_IOCTL(USBDEVFS_DISCARDURB) +COMPATIBLE_IOCTL(USBDEVFS_CLAIMINTERFACE) +COMPATIBLE_IOCTL(USBDEVFS_RELEASEINTERFACE) +COMPATIBLE_IOCTL(USBDEVFS_CONNECTINFO) +COMPATIBLE_IOCTL(USBDEVFS_HUB_PORTINFO) +COMPATIBLE_IOCTL(USBDEVFS_RESET) +COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32) +COMPATIBLE_IOCTL(USBDEVFS_REAPURB32) +COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32) +COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT) +/* MTD */ +COMPATIBLE_IOCTL(MEMGETINFO) +COMPATIBLE_IOCTL(MEMERASE) +COMPATIBLE_IOCTL(MEMLOCK) +COMPATIBLE_IOCTL(MEMUNLOCK) +COMPATIBLE_IOCTL(MEMGETREGIONCOUNT) +COMPATIBLE_IOCTL(MEMGETREGIONINFO) +COMPATIBLE_IOCTL(MEMGETBADBLOCK) +COMPATIBLE_IOCTL(MEMSETBADBLOCK) +/* NBD */ +ULONG_IOCTL(NBD_SET_SOCK) +ULONG_IOCTL(NBD_SET_BLKSIZE) +ULONG_IOCTL(NBD_SET_SIZE) +COMPATIBLE_IOCTL(NBD_DO_IT) +COMPATIBLE_IOCTL(NBD_CLEAR_SOCK) +COMPATIBLE_IOCTL(NBD_CLEAR_QUE) +COMPATIBLE_IOCTL(NBD_PRINT_DEBUG) +ULONG_IOCTL(NBD_SET_SIZE_BLOCKS) +COMPATIBLE_IOCTL(NBD_DISCONNECT) +/* i2c */ +COMPATIBLE_IOCTL(I2C_SLAVE) +COMPATIBLE_IOCTL(I2C_SLAVE_FORCE) +COMPATIBLE_IOCTL(I2C_TENBIT) +COMPATIBLE_IOCTL(I2C_PEC) +COMPATIBLE_IOCTL(I2C_RETRIES) +COMPATIBLE_IOCTL(I2C_TIMEOUT) +/* wireless */ +COMPATIBLE_IOCTL(SIOCSIWCOMMIT) +COMPATIBLE_IOCTL(SIOCGIWNAME) +COMPATIBLE_IOCTL(SIOCSIWNWID) +COMPATIBLE_IOCTL(SIOCGIWNWID) +COMPATIBLE_IOCTL(SIOCSIWFREQ) +COMPATIBLE_IOCTL(SIOCGIWFREQ) +COMPATIBLE_IOCTL(SIOCSIWMODE) +COMPATIBLE_IOCTL(SIOCGIWMODE) +COMPATIBLE_IOCTL(SIOCSIWSENS) +COMPATIBLE_IOCTL(SIOCGIWSENS) +COMPATIBLE_IOCTL(SIOCSIWRANGE) +COMPATIBLE_IOCTL(SIOCSIWPRIV) +COMPATIBLE_IOCTL(SIOCGIWPRIV) +COMPATIBLE_IOCTL(SIOCSIWSTATS) +COMPATIBLE_IOCTL(SIOCGIWSTATS) +COMPATIBLE_IOCTL(SIOCSIWAP) +COMPATIBLE_IOCTL(SIOCGIWAP) +COMPATIBLE_IOCTL(SIOCSIWSCAN) +COMPATIBLE_IOCTL(SIOCSIWRATE) +COMPATIBLE_IOCTL(SIOCGIWRATE) +COMPATIBLE_IOCTL(SIOCSIWRTS) +COMPATIBLE_IOCTL(SIOCGIWRTS) +COMPATIBLE_IOCTL(SIOCSIWFRAG) +COMPATIBLE_IOCTL(SIOCGIWFRAG) +COMPATIBLE_IOCTL(SIOCSIWTXPOW) +COMPATIBLE_IOCTL(SIOCGIWTXPOW) +COMPATIBLE_IOCTL(SIOCSIWRETRY) +COMPATIBLE_IOCTL(SIOCGIWRETRY) +COMPATIBLE_IOCTL(SIOCSIWPOWER) +COMPATIBLE_IOCTL(SIOCGIWPOWER) +/* hiddev */ +COMPATIBLE_IOCTL(HIDIOCGVERSION) +COMPATIBLE_IOCTL(HIDIOCAPPLICATION) +COMPATIBLE_IOCTL(HIDIOCGDEVINFO) +COMPATIBLE_IOCTL(HIDIOCGSTRING) +COMPATIBLE_IOCTL(HIDIOCINITREPORT) +COMPATIBLE_IOCTL(HIDIOCGREPORT) +COMPATIBLE_IOCTL(HIDIOCSREPORT) +COMPATIBLE_IOCTL(HIDIOCGREPORTINFO) +COMPATIBLE_IOCTL(HIDIOCGFIELDINFO) +COMPATIBLE_IOCTL(HIDIOCGUSAGE) +COMPATIBLE_IOCTL(HIDIOCSUSAGE) +COMPATIBLE_IOCTL(HIDIOCGUCODE) +COMPATIBLE_IOCTL(HIDIOCGFLAG) +COMPATIBLE_IOCTL(HIDIOCSFLAG) +COMPATIBLE_IOCTL(HIDIOCGCOLLECTIONINDEX) +COMPATIBLE_IOCTL(HIDIOCGCOLLECTIONINFO) +/* dvb */ +COMPATIBLE_IOCTL(AUDIO_STOP) +COMPATIBLE_IOCTL(AUDIO_PLAY) +COMPATIBLE_IOCTL(AUDIO_PAUSE) +COMPATIBLE_IOCTL(AUDIO_CONTINUE) +COMPATIBLE_IOCTL(AUDIO_SELECT_SOURCE) +COMPATIBLE_IOCTL(AUDIO_SET_MUTE) +COMPATIBLE_IOCTL(AUDIO_SET_AV_SYNC) +COMPATIBLE_IOCTL(AUDIO_SET_BYPASS_MODE) +COMPATIBLE_IOCTL(AUDIO_CHANNEL_SELECT) +COMPATIBLE_IOCTL(AUDIO_GET_STATUS) +COMPATIBLE_IOCTL(AUDIO_GET_CAPABILITIES) +COMPATIBLE_IOCTL(AUDIO_CLEAR_BUFFER) +COMPATIBLE_IOCTL(AUDIO_SET_ID) +COMPATIBLE_IOCTL(AUDIO_SET_MIXER) +COMPATIBLE_IOCTL(AUDIO_SET_STREAMTYPE) +COMPATIBLE_IOCTL(AUDIO_SET_EXT_ID) +COMPATIBLE_IOCTL(AUDIO_SET_ATTRIBUTES) +COMPATIBLE_IOCTL(AUDIO_SET_KARAOKE) +COMPATIBLE_IOCTL(DMX_START) +COMPATIBLE_IOCTL(DMX_STOP) +COMPATIBLE_IOCTL(DMX_SET_FILTER) +COMPATIBLE_IOCTL(DMX_SET_PES_FILTER) +COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE) +COMPATIBLE_IOCTL(DMX_GET_PES_PIDS) +COMPATIBLE_IOCTL(DMX_GET_CAPS) +COMPATIBLE_IOCTL(DMX_SET_SOURCE) +COMPATIBLE_IOCTL(DMX_GET_STC) +COMPATIBLE_IOCTL(FE_GET_INFO) +COMPATIBLE_IOCTL(FE_DISEQC_RESET_OVERLOAD) +COMPATIBLE_IOCTL(FE_DISEQC_SEND_MASTER_CMD) +COMPATIBLE_IOCTL(FE_DISEQC_RECV_SLAVE_REPLY) +COMPATIBLE_IOCTL(FE_DISEQC_SEND_BURST) +COMPATIBLE_IOCTL(FE_SET_TONE) +COMPATIBLE_IOCTL(FE_SET_VOLTAGE) +COMPATIBLE_IOCTL(FE_ENABLE_HIGH_LNB_VOLTAGE) +COMPATIBLE_IOCTL(FE_READ_STATUS) +COMPATIBLE_IOCTL(FE_READ_BER) +COMPATIBLE_IOCTL(FE_READ_SIGNAL_STRENGTH) +COMPATIBLE_IOCTL(FE_READ_SNR) +COMPATIBLE_IOCTL(FE_READ_UNCORRECTED_BLOCKS) +COMPATIBLE_IOCTL(FE_SET_FRONTEND) +COMPATIBLE_IOCTL(FE_GET_FRONTEND) +COMPATIBLE_IOCTL(FE_GET_EVENT) +COMPATIBLE_IOCTL(FE_DISHNETWORK_SEND_LEGACY_CMD) +COMPATIBLE_IOCTL(VIDEO_STOP) +COMPATIBLE_IOCTL(VIDEO_PLAY) +COMPATIBLE_IOCTL(VIDEO_FREEZE) +COMPATIBLE_IOCTL(VIDEO_CONTINUE) +COMPATIBLE_IOCTL(VIDEO_SELECT_SOURCE) +COMPATIBLE_IOCTL(VIDEO_SET_BLANK) +COMPATIBLE_IOCTL(VIDEO_GET_STATUS) +COMPATIBLE_IOCTL(VIDEO_SET_DISPLAY_FORMAT) +COMPATIBLE_IOCTL(VIDEO_FAST_FORWARD) +COMPATIBLE_IOCTL(VIDEO_SLOWMOTION) +COMPATIBLE_IOCTL(VIDEO_GET_CAPABILITIES) +COMPATIBLE_IOCTL(VIDEO_CLEAR_BUFFER) +COMPATIBLE_IOCTL(VIDEO_SET_ID) +COMPATIBLE_IOCTL(VIDEO_SET_STREAMTYPE) +COMPATIBLE_IOCTL(VIDEO_SET_FORMAT) +COMPATIBLE_IOCTL(VIDEO_SET_SYSTEM) +COMPATIBLE_IOCTL(VIDEO_SET_HIGHLIGHT) +COMPATIBLE_IOCTL(VIDEO_SET_SPU) +COMPATIBLE_IOCTL(VIDEO_GET_NAVI) +COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES) +COMPATIBLE_IOCTL(VIDEO_GET_SIZE) +COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE) + +/* now things that need handlers */ HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) #ifdef CONFIG_NET @@ -2638,4 +3491,159 @@ IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) }; -int ioctl_table_size = ARRAY_SIZE(ioctl_start); +#define IOCTL_HASHSIZE 256 +static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; + +static inline unsigned long ioctl32_hash(unsigned long cmd) +{ + return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; +} + +static void compat_ioctl_error(struct file *filp, unsigned int fd, + unsigned int cmd, unsigned long arg) +{ + char buf[10]; + char *fn = "?"; + char *path; + + /* find the name of the device. */ + path = (char *)__get_free_page(GFP_KERNEL); + if (path) { + fn = d_path(filp->f_path.dentry, filp->f_path.mnt, path, PAGE_SIZE); + if (IS_ERR(fn)) + fn = "?"; + } + + sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK); + if (!isprint(buf[1])) + sprintf(buf, "%02x", buf[1]); + compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) " + "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n", + current->comm, current->pid, + (int)fd, (unsigned int)cmd, buf, + (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK, + (unsigned int)arg, fn); + + if (path) + free_page((unsigned long)path); +} + +asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, + unsigned long arg) +{ + struct file *filp; + int error = -EBADF; + struct ioctl_trans *t; + int fput_needed; + + filp = fget_light(fd, &fput_needed); + if (!filp) + goto out; + + /* RED-PEN how should LSM module know it's handling 32bit? */ + error = security_file_ioctl(filp, cmd, arg); + if (error) + goto out_fput; + + /* + * To allow the compat_ioctl handlers to be self contained + * we need to check the common ioctls here first. + * Just handle them with the standard handlers below. + */ + switch (cmd) { + case FIOCLEX: + case FIONCLEX: + case FIONBIO: + case FIOASYNC: + case FIOQSIZE: + break; + + case FIBMAP: + case FIGETBSZ: + case FIONREAD: + if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + break; + /*FALL THROUGH*/ + + default: + if (filp->f_op && filp->f_op->compat_ioctl) { + error = filp->f_op->compat_ioctl(filp, cmd, arg); + if (error != -ENOIOCTLCMD) + goto out_fput; + } + + if (!filp->f_op || + (!filp->f_op->ioctl && !filp->f_op->unlocked_ioctl)) + goto do_ioctl; + break; + } + + for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) { + if (t->cmd == cmd) + goto found_handler; + } + +#ifdef CONFIG_NET + if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) && + cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { + error = siocdevprivate_ioctl(fd, cmd, arg); + } else +#endif + { + static int count; + + if (++count <= 50) + compat_ioctl_error(filp, fd, cmd, arg); + error = -EINVAL; + } + + goto out_fput; + + found_handler: + if (t->handler) { + lock_kernel(); + error = t->handler(fd, cmd, arg, filp); + unlock_kernel(); + goto out_fput; + } + + do_ioctl: + error = vfs_ioctl(filp, fd, cmd, arg); + out_fput: + fput_light(filp, fput_needed); + out: + return error; +} + +static void ioctl32_insert_translation(struct ioctl_trans *trans) +{ + unsigned long hash; + struct ioctl_trans *t; + + hash = ioctl32_hash (trans->cmd); + if (!ioctl32_hash_table[hash]) + ioctl32_hash_table[hash] = trans; + else { + t = ioctl32_hash_table[hash]; + while (t->next) + t = t->next; + trans->next = NULL; + t->next = trans; + } +} + +static int __init init_sys32_ioctl(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) { + if (ioctl_start[i].next != 0) { + printk("ioctl translation %d bad\n",i); + return -1; + } + + ioctl32_insert_translation(&ioctl_start[i]); + } + return 0; +} +__initcall(init_sys32_ioctl); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index d98be5e0132..3527c7c6def 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -77,36 +77,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf return ret; } - -/** - * flush_read_buffer - push buffer to userspace. - * @buffer: data buffer for file. - * @userbuf: user-passed buffer. - * @count: number of bytes requested. - * @ppos: file position. - * - * Copy the buffer we filled in fill_read_buffer() to userspace. - * This is done at the reader's leisure, copying and advancing - * the amount they specify each time. - * This may be called continuously until the buffer is empty. - */ -static int flush_read_buffer(struct configfs_buffer * buffer, char __user * buf, - size_t count, loff_t * ppos) -{ - int error; - - if (*ppos > buffer->count) - return 0; - - if (count > (buffer->count - *ppos)) - count = buffer->count - *ppos; - - error = copy_to_user(buf,buffer->page + *ppos,count); - if (!error) - *ppos += count; - return error ? -EFAULT : count; -} - /** * configfs_read_file - read an attribute. * @file: file pointer. @@ -139,7 +109,8 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", __FUNCTION__, count, *ppos, buffer->page); - retval = flush_read_buffer(buffer,buf,count,ppos); + retval = simple_read_from_buffer(buf, count, ppos, buffer->page, + buffer->count); out: up(&buffer->sem); return retval; diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 2ec9beac17c..ddc003a9d21 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -32,6 +32,7 @@ #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> +#include <linux/sched.h> #include <linux/configfs.h> #include "configfs_internal.h" diff --git a/fs/dcache.c b/fs/dcache.c index d1bf5d8aeb5..0e73aa0a0e8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -21,7 +21,6 @@ #include <linux/fsnotify.h> #include <linux/slab.h> #include <linux/init.h> -#include <linux/smp_lock.h> #include <linux/hash.h> #include <linux/cache.h> #include <linux/module.h> @@ -121,6 +120,28 @@ static void dentry_iput(struct dentry * dentry) } } +/** + * d_kill - kill dentry and return parent + * @dentry: dentry to kill + * + * Called with dcache_lock and d_lock, releases both. The dentry must + * already be unhashed and removed from the LRU. + * + * If this is the root of the dentry tree, return NULL. + */ +static struct dentry *d_kill(struct dentry *dentry) +{ + struct dentry *parent; + + list_del(&dentry->d_u.d_child); + dentry_stat.nr_dentry--; /* For d_free, below */ + /*drops the locks, at that point nobody can reach this dentry */ + dentry_iput(dentry); + parent = dentry->d_parent; + d_free(dentry); + return dentry == parent ? NULL : parent; +} + /* * This is dput * @@ -189,28 +210,17 @@ repeat: unhash_it: __d_drop(dentry); - -kill_it: { - struct dentry *parent; - - /* If dentry was on d_lru list - * delete it from there - */ - if (!list_empty(&dentry->d_lru)) { - list_del(&dentry->d_lru); - dentry_stat.nr_unused--; - } - list_del(&dentry->d_u.d_child); - dentry_stat.nr_dentry--; /* For d_free, below */ - /*drops the locks, at that point nobody can reach this dentry */ - dentry_iput(dentry); - parent = dentry->d_parent; - d_free(dentry); - if (dentry == parent) - return; - dentry = parent; - goto repeat; +kill_it: + /* If dentry was on d_lru list + * delete it from there + */ + if (!list_empty(&dentry->d_lru)) { + list_del(&dentry->d_lru); + dentry_stat.nr_unused--; } + dentry = d_kill(dentry); + if (dentry) + goto repeat; } /** @@ -371,22 +381,40 @@ restart: * Throw away a dentry - free the inode, dput the parent. This requires that * the LRU list has already been removed. * + * If prune_parents is true, try to prune ancestors as well. + * * Called with dcache_lock, drops it and then regains. * Called with dentry->d_lock held, drops it. */ -static void prune_one_dentry(struct dentry * dentry) +static void prune_one_dentry(struct dentry * dentry, int prune_parents) { - struct dentry * parent; - __d_drop(dentry); - list_del(&dentry->d_u.d_child); - dentry_stat.nr_dentry--; /* For d_free, below */ - dentry_iput(dentry); - parent = dentry->d_parent; - d_free(dentry); - if (parent != dentry) - dput(parent); + dentry = d_kill(dentry); + if (!prune_parents) { + dput(dentry); + spin_lock(&dcache_lock); + return; + } + + /* + * Prune ancestors. Locking is simpler than in dput(), + * because dcache_lock needs to be taken anyway. + */ spin_lock(&dcache_lock); + while (dentry) { + if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) + return; + + if (dentry->d_op && dentry->d_op->d_delete) + dentry->d_op->d_delete(dentry); + if (!list_empty(&dentry->d_lru)) { + list_del(&dentry->d_lru); + dentry_stat.nr_unused--; + } + __d_drop(dentry); + dentry = d_kill(dentry); + spin_lock(&dcache_lock); + } } /** @@ -394,6 +422,7 @@ static void prune_one_dentry(struct dentry * dentry) * @count: number of entries to try and free * @sb: if given, ignore dentries for other superblocks * which are being unmounted. + * @prune_parents: if true, try to prune ancestors as well in one go * * Shrink the dcache. This is done when we need * more memory, or simply when we need to unmount @@ -404,7 +433,7 @@ static void prune_one_dentry(struct dentry * dentry) * all the dentries are in use. */ -static void prune_dcache(int count, struct super_block *sb) +static void prune_dcache(int count, struct super_block *sb, int prune_parents) { spin_lock(&dcache_lock); for (; count ; count--) { @@ -464,7 +493,7 @@ static void prune_dcache(int count, struct super_block *sb) * without taking the s_umount lock (I already hold it). */ if (sb && dentry->d_sb == sb) { - prune_one_dentry(dentry); + prune_one_dentry(dentry, prune_parents); continue; } /* @@ -479,7 +508,7 @@ static void prune_dcache(int count, struct super_block *sb) s_umount = &dentry->d_sb->s_umount; if (down_read_trylock(s_umount)) { if (dentry->d_sb->s_root != NULL) { - prune_one_dentry(dentry); + prune_one_dentry(dentry, prune_parents); up_read(s_umount); continue; } @@ -550,7 +579,7 @@ repeat: spin_unlock(&dentry->d_lock); continue; } - prune_one_dentry(dentry); + prune_one_dentry(dentry, 1); cond_resched_lock(&dcache_lock); goto repeat; } @@ -829,7 +858,7 @@ void shrink_dcache_parent(struct dentry * parent) int found; while ((found = select_parent(parent)) != 0) - prune_dcache(found, parent->d_sb); + prune_dcache(found, parent->d_sb, 1); } /* @@ -849,7 +878,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask) if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; - prune_dcache(nr, NULL); + prune_dcache(nr, NULL, 1); } return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } @@ -1823,6 +1852,16 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, struct vfsmount *rootmnt; struct dentry *root; + /* + * We have various synthetic filesystems that never get mounted. On + * these filesystems dentries are never used for lookup purposes, and + * thus don't need to be hashed. They also don't need a name until a + * user wants to identify the object in /proc/pid/fd/. The little hack + * below allows us to generate a name for these objects on demand: + */ + if (dentry->d_op && dentry->d_op->d_dname) + return dentry->d_op->d_dname(dentry, buf, buflen); + read_lock(¤t->fs->lock); rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); @@ -1836,6 +1875,27 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, } /* + * Helper function for dentry_operations.d_dname() members + */ +char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, + const char *fmt, ...) +{ + va_list args; + char temp[64]; + int sz; + + va_start(args, fmt); + sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; + va_end(args); + + if (sz > sizeof(temp) || sz > buflen) + return ERR_PTR(-ENAMETOOLONG); + + buffer += buflen - sz; + return memcpy(buffer, temp, sz); +} + +/* * NOTE! The user-level library version returns a * character pointer. The kernel system call just * returns the length of the buffer filled (which diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 643e57b622b..06ef9a255c7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -19,6 +19,7 @@ #include <linux/tty.h> #include <linux/devpts_fs.h> #include <linux/parser.h> +#include <linux/fsnotify.h> #define DEVPTS_SUPER_MAGIC 0x1cd1 @@ -178,8 +179,10 @@ int devpts_pty_new(struct tty_struct *tty) inode->i_private = tty; dentry = get_node(number); - if (!IS_ERR(dentry) && !dentry->d_inode) + if (!IS_ERR(dentry) && !dentry->d_inode) { d_instantiate(dentry, inode); + fsnotify_create(devpts_root->d_inode, dentry); + } mutex_unlock(&devpts_root->d_inode->i_mutex); diff --git a/fs/direct-io.c b/fs/direct-io.c index d9d0833444f..52bb2638f7a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -439,7 +439,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) * Wait on and process all in-flight BIOs. This must only be called once * all bios have been issued so that the refcount can only decrease. * This just waits for all bios to make it through dio_bio_complete. IO - * errors are propogated through dio->io_error and should be propogated via + * errors are propagated through dio->io_error and should be propagated via * dio_complete(). */ static void dio_await_completion(struct dio *dio) @@ -867,7 +867,6 @@ static int do_direct_IO(struct dio *dio) do_holes: /* Handle holes */ if (!buffer_mapped(map_bh)) { - char *kaddr; loff_t i_size_aligned; /* AKPM: eargh, -ENOTBLK is a hack */ @@ -888,11 +887,8 @@ do_holes: page_cache_release(page); goto out; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + (block_in_page << blkbits), - 0, 1 << blkbits); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, block_in_page << blkbits, + 1 << blkbits, KM_USER0); dio->block_in_file++; block_in_page++; goto next_block; @@ -1110,7 +1106,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, spin_lock_irqsave(&dio->bio_lock, flags); ret2 = --dio->refcount; spin_unlock_irqrestore(&dio->bio_lock, flags); - BUG_ON(!dio->is_async && ret2 != 0); + if (ret2 == 0) { ret = dio_complete(dio, offset, ret); kfree(dio); diff --git a/fs/dquot.c b/fs/dquot.c index 0a5febc159f..8819d281500 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -69,7 +69,6 @@ #include <linux/file.h> #include <linux/slab.h> #include <linux/sysctl.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> @@ -475,7 +474,7 @@ int vfs_quota_sync(struct super_block *sb, int type) spin_lock(&dq_list_lock); dirty = &dqopt->info[cnt].dqi_dirty_list; while (!list_empty(dirty)) { - dquot = list_entry(dirty->next, struct dquot, dq_dirty); + dquot = list_first_entry(dirty, struct dquot, dq_dirty); /* Dirty and inactive can be only bad dquot... */ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { clear_dquot_dirty(dquot); @@ -721,7 +720,8 @@ static inline int dqput_blocks(struct dquot *dquot) /* Remove references to dquots from inode - add dquot to list for freeing if needed */ /* We can't race with anybody because we hold dqptr_sem for writing... */ -int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) +static int remove_inode_dquot_ref(struct inode *inode, int type, + struct list_head *tofree_head) { struct dquot *dquot = inode->i_dquot[type]; @@ -1421,7 +1421,7 @@ int vfs_quota_off(struct super_block *sb, int type) /* If quota was reenabled in the meantime, we have * nothing to do */ if (!sb_has_quota_enabled(sb, cnt)) { - mutex_lock(&toputinode[cnt]->i_mutex); + mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); truncate_inode_pages(&toputinode[cnt]->i_data, 0); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 403e3bad145..1b9dd9a96f1 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -580,5 +580,7 @@ void ecryptfs_write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, size_t *written); +int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, + int num_zeros); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 7a7d25d541e..59288d81707 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -28,69 +28,11 @@ #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/security.h> -#include <linux/smp_lock.h> #include <linux/compat.h> #include <linux/fs_stack.h> #include "ecryptfs_kernel.h" /** - * ecryptfs_llseek - * @file: File we are seeking in - * @offset: The offset to seek to - * @origin: 2 - offset from i_size; 1 - offset from f_pos - * - * Returns the position we have seeked to, or negative on error - */ -static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin) -{ - loff_t rv; - loff_t new_end_pos; - int rc; - int expanding_file = 0; - struct inode *inode = file->f_mapping->host; - - /* If our offset is past the end of our file, we're going to - * need to grow it so we have a valid length of 0's */ - new_end_pos = offset; - switch (origin) { - case 2: - new_end_pos += i_size_read(inode); - expanding_file = 1; - break; - case 1: - new_end_pos += file->f_pos; - if (new_end_pos > i_size_read(inode)) { - ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " - "> i_size_read(inode)(=[0x%.16x])\n", - new_end_pos, i_size_read(inode)); - expanding_file = 1; - } - break; - default: - if (new_end_pos > i_size_read(inode)) { - ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " - "> i_size_read(inode)(=[0x%.16x])\n", - new_end_pos, i_size_read(inode)); - expanding_file = 1; - } - } - ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos); - if (expanding_file) { - rc = ecryptfs_truncate(file->f_path.dentry, new_end_pos); - if (rc) { - rv = rc; - ecryptfs_printk(KERN_ERR, "Error on attempt to " - "truncate to (higher) offset [0x%.16x];" - " rc = [%d]\n", new_end_pos, rc); - goto out; - } - } - rv = generic_file_llseek(file, offset, origin); -out: - return rv; -} - -/** * ecryptfs_read_update_atime * * generic_file_read updates the atime of upper layer inode. But, it @@ -426,7 +368,7 @@ const struct file_operations ecryptfs_dir_fops = { }; const struct file_operations ecryptfs_main_fops = { - .llseek = ecryptfs_llseek, + .llseek = generic_file_llseek, .read = do_sync_read, .aio_read = ecryptfs_read_update_atime, .write = do_sync_write, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 1548be26b5e..83e94fedd4e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -800,6 +800,25 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) goto out_fput; } } else { /* new_length < i_size_read(inode) */ + pgoff_t index = 0; + int end_pos_in_page = -1; + + if (new_length != 0) { + index = ((new_length - 1) >> PAGE_CACHE_SHIFT); + end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); + } + if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) { + if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file, + index, + (end_pos_in_page + 1), + ((PAGE_CACHE_SIZE - 1) + - end_pos_in_page)))) { + printk(KERN_ERR "Error attempting to zero out " + "the remainder of the end page on " + "reducing truncate; rc = [%d]\n", rc); + goto out_fput; + } + } vmtruncate(inode, new_length); rc = ecryptfs_write_inode_size_to_metadata( lower_file, lower_dentry->d_inode, inode, dentry, @@ -875,9 +894,54 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) struct ecryptfs_crypt_stat *crypt_stat; crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; - lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + ecryptfs_init_crypt_stat(crypt_stat); inode = dentry->d_inode; lower_inode = ecryptfs_inode_to_lower(inode); + lower_dentry = ecryptfs_dentry_to_lower(dentry); + mutex_lock(&crypt_stat->cs_mutex); + if (S_ISDIR(dentry->d_inode->i_mode)) + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + else if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) + || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { + struct vfsmount *lower_mnt; + struct file *lower_file = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + int lower_flags; + + lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_flags = O_RDONLY; + if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, + lower_mnt, lower_flags))) { + printk(KERN_ERR + "Error opening lower file; rc = [%d]\n", rc); + mutex_unlock(&crypt_stat->cs_mutex); + goto out; + } + mount_crypt_stat = &ecryptfs_superblock_to_private( + dentry->d_sb)->mount_crypt_stat; + if ((rc = ecryptfs_read_metadata(dentry, lower_file))) { + if (!(mount_crypt_stat->flags + & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { + rc = -EIO; + printk(KERN_WARNING "Attempt to read file that " + "is not in a valid eCryptfs format, " + "and plaintext passthrough mode is not " + "enabled; returning -EIO\n"); + + mutex_unlock(&crypt_stat->cs_mutex); + fput(lower_file); + goto out; + } + rc = 0; + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + mutex_unlock(&crypt_stat->cs_mutex); + fput(lower_file); + goto out; + } + fput(lower_file); + } + mutex_unlock(&crypt_stat->cs_mutex); if (ia->ia_valid & ATTR_SIZE) { ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n", diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 8cbf3f69ebe..606128f5c92 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -583,8 +583,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) { struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static struct ecryptfs_cache_info { diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 3baf253be95..a9d87c47f72 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -19,7 +19,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ - +#include <linux/sched.h> #include "ecryptfs_kernel.h" static LIST_HEAD(ecryptfs_msg_ctx_free_list); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 0770c4b66f5..7d5a43cb0d5 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -56,9 +56,6 @@ static struct page *ecryptfs_get1page(struct file *file, int index) return read_mapping_page(mapping, index, (void *)file); } -static -int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); - /** * ecryptfs_fill_zeros * @file: The ecryptfs file @@ -101,10 +98,13 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) if (old_end_page_index == new_end_page_index) { /* Start and end are in the same page; we just need to * set a portion of the existing page to zero's */ - rc = write_zeros(file, index, (old_end_pos_in_page + 1), - (new_end_pos_in_page - old_end_pos_in_page)); + rc = ecryptfs_write_zeros(file, index, + (old_end_pos_in_page + 1), + (new_end_pos_in_page + - old_end_pos_in_page)); if (rc) - ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(" + "file=[%p], " "index=[0x%.16x], " "old_end_pos_in_page=[d], " "(PAGE_CACHE_SIZE - new_end_pos_in_page" @@ -117,10 +117,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) goto out; } /* Fill the remainder of the previous last page with zeros */ - rc = write_zeros(file, index, (old_end_pos_in_page + 1), + rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1), ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page)); if (rc) { - ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], " "index=[0x%.16x], old_end_pos_in_page=[d], " "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) " "returned [%d]\n", file, index, @@ -131,9 +131,10 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) index++; while (index < new_end_page_index) { /* Fill all intermediate pages with zeros */ - rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE); + rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE); if (rc) { - ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(" + "file=[%p], " "index=[0x%.16x], " "old_end_pos_in_page=[d], " "(PAGE_CACHE_SIZE - new_end_pos_in_page" @@ -149,9 +150,9 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length) } /* Fill the portion at the beginning of the last new page with * zero's */ - rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1)); + rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1)); if (rc) { - ecryptfs_printk(KERN_ERR, "write_zeros(file=" + ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=" "[%p], index=[0x%.16x], 0, " "new_end_pos_in_page=[%d]" "returned [%d]\n", file, index, @@ -364,22 +365,39 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) { struct inode *inode = page->mapping->host; int end_byte_in_page; - char *page_virt; if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index) goto out; end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; if (to > end_byte_in_page) end_byte_in_page = to; - page_virt = kmap_atomic(page, KM_USER0); - memset((page_virt + end_byte_in_page), 0, - (PAGE_CACHE_SIZE - end_byte_in_page)); - kunmap_atomic(page_virt, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, end_byte_in_page, + PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0); out: return 0; } +/** + * eCryptfs does not currently support holes. When writing after a + * seek past the end of the file, eCryptfs fills in 0's through to the + * current location. The code to fill in the 0's to all the + * intermediate pages calls ecryptfs_prepare_write_no_truncate(). + */ +static int +ecryptfs_prepare_write_no_truncate(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + int rc = 0; + + if (from == 0 && to == PAGE_CACHE_SIZE) + goto out; /* If we are writing a full page, it will be + up to date. */ + if (!PageUptodate(page)) + rc = ecryptfs_do_readpage(file, page, page->index); +out: + return rc; +} + static int ecryptfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -390,6 +408,23 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, up to date. */ if (!PageUptodate(page)) rc = ecryptfs_do_readpage(file, page, page->index); + if (page->index != 0) { + loff_t end_of_prev_pg_pos = + (((loff_t)page->index << PAGE_CACHE_SHIFT) - 1); + + if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) { + rc = ecryptfs_truncate(file->f_path.dentry, + end_of_prev_pg_pos); + if (rc) { + printk(KERN_ERR "Error on attempt to " + "truncate to (higher) offset [%lld];" + " rc = [%d]\n", end_of_prev_pg_pos, rc); + goto out; + } + } + if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host)) + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + } out: return rc; } @@ -725,7 +760,7 @@ out: } /** - * write_zeros + * ecryptfs_write_zeros * @file: The ecryptfs file * @index: The index in which we are writing * @start: The position after the last block of data @@ -735,12 +770,11 @@ out: * * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE */ -static -int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) +int +ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) { int rc = 0; struct page *tmp_page; - char *tmp_page_virt; tmp_page = ecryptfs_get1page(file, index); if (IS_ERR(tmp_page)) { @@ -749,18 +783,15 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) rc = PTR_ERR(tmp_page); goto out; } - rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros); - if (rc) { + if ((rc = ecryptfs_prepare_write_no_truncate(file, tmp_page, start, + (start + num_zeros)))) { ecryptfs_printk(KERN_ERR, "Error preparing to write zero's " - "to remainder of page at index [0x%.16x]\n", + "to page at index [0x%.16x]\n", index); page_cache_release(tmp_page); goto out; } - tmp_page_virt = kmap_atomic(tmp_page, KM_USER0); - memset(((char *)tmp_page_virt + start), 0, num_zeros); - kunmap_atomic(tmp_page_virt, KM_USER0); - flush_dcache_page(tmp_page); + zero_user_page(tmp_page, start, num_zeros, KM_USER0); rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " diff --git a/fs/efs/super.c b/fs/efs/super.c index ba7a8b9da0c..e0a6839e68a 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -72,8 +72,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct efs_inode_info *ei = (struct efs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/eventfd.c b/fs/eventfd.c new file mode 100644 index 00000000000..2ce19c000d2 --- /dev/null +++ b/fs/eventfd.c @@ -0,0 +1,226 @@ +/* + * fs/eventfd.c + * + * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/anon_inodes.h> +#include <linux/eventfd.h> + +struct eventfd_ctx { + wait_queue_head_t wqh; + /* + * Every time that a write(2) is performed on an eventfd, the + * value of the __u64 being written is added to "count" and a + * wakeup is performed on "wqh". A read(2) will return the "count" + * value to userspace, and will reset "count" to zero. The kernel + * size eventfd_signal() also, adds to the "count" counter and + * issue a wakeup. + */ + __u64 count; +}; + +/* + * Adds "n" to the eventfd counter "count". Returns "n" in case of + * success, or a value lower then "n" in case of coutner overflow. + * This function is supposed to be called by the kernel in paths + * that do not allow sleeping. In this function we allow the counter + * to reach the ULLONG_MAX value, and we signal this as overflow + * condition by returining a POLLERR to poll(2). + */ +int eventfd_signal(struct file *file, int n) +{ + struct eventfd_ctx *ctx = file->private_data; + unsigned long flags; + + if (n < 0) + return -EINVAL; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ULLONG_MAX - ctx->count < n) + n = (int) (ULLONG_MAX - ctx->count); + ctx->count += n; + if (waitqueue_active(&ctx->wqh)) + wake_up_locked(&ctx->wqh); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return n; +} + +static int eventfd_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static unsigned int eventfd_poll(struct file *file, poll_table *wait) +{ + struct eventfd_ctx *ctx = file->private_data; + unsigned int events = 0; + unsigned long flags; + + poll_wait(file, &ctx->wqh, wait); + + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->count > 0) + events |= POLLIN; + if (ctx->count == ULLONG_MAX) + events |= POLLERR; + if (ULLONG_MAX - 1 > ctx->count) + events |= POLLOUT; + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return events; +} + +static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct eventfd_ctx *ctx = file->private_data; + ssize_t res; + __u64 ucnt; + DECLARE_WAITQUEUE(wait, current); + + if (count < sizeof(ucnt)) + return -EINVAL; + spin_lock_irq(&ctx->wqh.lock); + res = -EAGAIN; + ucnt = ctx->count; + if (ucnt > 0) + res = sizeof(ucnt); + else if (!(file->f_flags & O_NONBLOCK)) { + __add_wait_queue(&ctx->wqh, &wait); + for (res = 0;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (ctx->count > 0) { + ucnt = ctx->count; + res = sizeof(ucnt); + break; + } + if (signal_pending(current)) { + res = -ERESTARTSYS; + break; + } + spin_unlock_irq(&ctx->wqh.lock); + schedule(); + spin_lock_irq(&ctx->wqh.lock); + } + __remove_wait_queue(&ctx->wqh, &wait); + __set_current_state(TASK_RUNNING); + } + if (res > 0) { + ctx->count = 0; + if (waitqueue_active(&ctx->wqh)) + wake_up_locked(&ctx->wqh); + } + spin_unlock_irq(&ctx->wqh.lock); + if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) + return -EFAULT; + + return res; +} + +static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, + loff_t *ppos) +{ + struct eventfd_ctx *ctx = file->private_data; + ssize_t res; + __u64 ucnt; + DECLARE_WAITQUEUE(wait, current); + + if (count < sizeof(ucnt)) + return -EINVAL; + if (copy_from_user(&ucnt, buf, sizeof(ucnt))) + return -EFAULT; + if (ucnt == ULLONG_MAX) + return -EINVAL; + spin_lock_irq(&ctx->wqh.lock); + res = -EAGAIN; + if (ULLONG_MAX - ctx->count > ucnt) + res = sizeof(ucnt); + else if (!(file->f_flags & O_NONBLOCK)) { + __add_wait_queue(&ctx->wqh, &wait); + for (res = 0;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (ULLONG_MAX - ctx->count > ucnt) { + res = sizeof(ucnt); + break; + } + if (signal_pending(current)) { + res = -ERESTARTSYS; + break; + } + spin_unlock_irq(&ctx->wqh.lock); + schedule(); + spin_lock_irq(&ctx->wqh.lock); + } + __remove_wait_queue(&ctx->wqh, &wait); + __set_current_state(TASK_RUNNING); + } + if (res > 0) { + ctx->count += ucnt; + if (waitqueue_active(&ctx->wqh)) + wake_up_locked(&ctx->wqh); + } + spin_unlock_irq(&ctx->wqh.lock); + + return res; +} + +static const struct file_operations eventfd_fops = { + .release = eventfd_release, + .poll = eventfd_poll, + .read = eventfd_read, + .write = eventfd_write, +}; + +struct file *eventfd_fget(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + if (file->f_op != &eventfd_fops) { + fput(file); + return ERR_PTR(-EINVAL); + } + + return file; +} + +asmlinkage long sys_eventfd(unsigned int count) +{ + int error, fd; + struct eventfd_ctx *ctx; + struct file *file; + struct inode *inode; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + init_waitqueue_head(&ctx->wqh); + ctx->count = count; + + /* + * When we call this, the initialization must be complete, since + * anon_inode_getfd() will install the fd. + */ + error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", + &eventfd_fops, ctx); + if (!error) + return fd; + + kfree(ctx); + return error; +} + diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3ae644e7e86..0b73cd45a06 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1,6 +1,6 @@ /* - * fs/eventpoll.c ( Efficent event polling implementation ) - * Copyright (C) 2001,...,2006 Davide Libenzi + * fs/eventpoll.c (Efficent event polling implementation) + * Copyright (C) 2001,...,2007 Davide Libenzi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -11,7 +11,6 @@ * */ -#include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -22,34 +21,31 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/poll.h> -#include <linux/smp_lock.h> #include <linux/string.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/spinlock.h> #include <linux/syscalls.h> -#include <linux/rwsem.h> #include <linux/rbtree.h> #include <linux/wait.h> #include <linux/eventpoll.h> #include <linux/mount.h> #include <linux/bitops.h> #include <linux/mutex.h> +#include <linux/anon_inodes.h> #include <asm/uaccess.h> #include <asm/system.h> #include <asm/io.h> #include <asm/mman.h> #include <asm/atomic.h> -#include <asm/semaphore.h> - /* * LOCKING: * There are three level of locking required by epoll : * * 1) epmutex (mutex) - * 2) ep->sem (rw_semaphore) - * 3) ep->lock (rw_lock) + * 2) ep->mtx (mutex) + * 3) ep->lock (spinlock) * * The acquire order is the one listed above, from 1 to 3. * We need a spinlock (ep->lock) because we manipulate objects @@ -59,25 +55,22 @@ * a spinlock. During the event transfer loop (from kernel to * user space) we could end up sleeping due a copy_to_user(), so * we need a lock that will allow us to sleep. This lock is a - * read-write semaphore (ep->sem). It is acquired on read during - * the event transfer loop and in write during epoll_ctl(EPOLL_CTL_DEL) - * and during eventpoll_release_file(). Then we also need a global - * semaphore to serialize eventpoll_release_file() and ep_free(). - * This semaphore is acquired by ep_free() during the epoll file + * mutex (ep->mtx). It is acquired during the event transfer loop, + * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). + * Then we also need a global mutex to serialize eventpoll_release_file() + * and ep_free(). + * This mutex is acquired by ep_free() during the epoll file * cleanup path and it is also acquired by eventpoll_release_file() * if a file has been pushed inside an epoll set and it is then * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). - * It is possible to drop the "ep->sem" and to use the global - * semaphore "epmutex" (together with "ep->lock") to have it working, - * but having "ep->sem" will make the interface more scalable. + * It is possible to drop the "ep->mtx" and to use the global + * mutex "epmutex" (together with "ep->lock") to have it working, + * but having "ep->mtx" will make the interface more scalable. * Events that require holding "epmutex" are very rare, while for - * normal operations the epoll private "ep->sem" will guarantee - * a greater scalability. + * normal operations the epoll private "ep->mtx" will guarantee + * a better scalability. */ - -#define EVENTPOLLFS_MAGIC 0x03111965 /* My birthday should work for this :) */ - #define DEBUG_EPOLL 0 #if DEBUG_EPOLL > 0 @@ -107,6 +100,7 @@ #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) +#define EP_UNACTIVE_PTR ((void *) -1L) struct epoll_filefd { struct file *file; @@ -117,7 +111,7 @@ struct epoll_filefd { * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". * It is used to keep track on all tasks that are currently inside the wake_up() code * to 1) short-circuit the one coming from the same task and same wait queue head - * ( loop ) 2) allow a maximum number of epoll descriptors inclusion nesting + * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting * 3) let go the ones coming from other tasks. */ struct wake_task_node { @@ -136,21 +130,57 @@ struct poll_safewake { }; /* + * Each file descriptor added to the eventpoll interface will + * have an entry of this type linked to the "rbr" RB tree. + */ +struct epitem { + /* RB tree node used to link this structure to the eventpoll RB tree */ + struct rb_node rbn; + + /* List header used to link this structure to the eventpoll ready list */ + struct list_head rdllink; + + /* + * Works together "struct eventpoll"->ovflist in keeping the + * single linked chain of items. + */ + struct epitem *next; + + /* The file descriptor information this item refers to */ + struct epoll_filefd ffd; + + /* Number of active wait queue attached to poll operations */ + int nwait; + + /* List containing poll wait queues */ + struct list_head pwqlist; + + /* The "container" of this item */ + struct eventpoll *ep; + + /* List header used to link this item to the "struct file" items list */ + struct list_head fllink; + + /* The structure that describe the interested events and the source fd */ + struct epoll_event event; +}; + +/* * This structure is stored inside the "private_data" member of the file * structure and rapresent the main data sructure for the eventpoll * interface. */ struct eventpoll { /* Protect the this structure access */ - rwlock_t lock; + spinlock_t lock; /* - * This semaphore is used to ensure that files are not removed - * while epoll is using them. This is read-held during the event - * collection loop and it is write-held during the file cleanup - * path, the epoll file exit code and the ctl operations. + * This mutex is used to ensure that files are not removed + * while epoll is using them. This is held during the event + * collection loop, the file cleanup path, the epoll file exit + * code and the ctl operations. */ - struct rw_semaphore sem; + struct mutex mtx; /* Wait queue used by sys_epoll_wait() */ wait_queue_head_t wq; @@ -161,8 +191,15 @@ struct eventpoll { /* List of ready file descriptors */ struct list_head rdllist; - /* RB-Tree root used to store monitored fd structs */ + /* RB tree root used to store monitored fd structs */ struct rb_root rbr; + + /* + * This is a single linked list that chains all the "struct epitem" that + * happened while transfering ready events to userspace w/out + * holding ->lock. + */ + struct epitem *ovflist; }; /* Wait structure used by the poll hooks */ @@ -183,99 +220,14 @@ struct eppoll_entry { wait_queue_head_t *whead; }; -/* - * Each file descriptor added to the eventpoll interface will - * have an entry of this type linked to the hash. - */ -struct epitem { - /* RB-Tree node used to link this structure to the eventpoll rb-tree */ - struct rb_node rbn; - - /* List header used to link this structure to the eventpoll ready list */ - struct list_head rdllink; - - /* The file descriptor information this item refers to */ - struct epoll_filefd ffd; - - /* Number of active wait queue attached to poll operations */ - int nwait; - - /* List containing poll wait queues */ - struct list_head pwqlist; - - /* The "container" of this item */ - struct eventpoll *ep; - - /* The structure that describe the interested events and the source fd */ - struct epoll_event event; - - /* - * Used to keep track of the usage count of the structure. This avoids - * that the structure will desappear from underneath our processing. - */ - atomic_t usecnt; - - /* List header used to link this item to the "struct file" items list */ - struct list_head fllink; - - /* List header used to link the item to the transfer list */ - struct list_head txlink; - - /* - * This is used during the collection/transfer of events to userspace - * to pin items empty events set. - */ - unsigned int revents; -}; - /* Wrapper struct used by poll queueing */ struct ep_pqueue { poll_table pt; struct epitem *epi; }; - - -static void ep_poll_safewake_init(struct poll_safewake *psw); -static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); -static int ep_getfd(int *efd, struct inode **einode, struct file **efile, - struct eventpoll *ep); -static int ep_alloc(struct eventpoll **pep); -static void ep_free(struct eventpoll *ep); -static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); -static void ep_use_epitem(struct epitem *epi); -static void ep_release_epitem(struct epitem *epi); -static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, - poll_table *pt); -static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi); -static int ep_insert(struct eventpoll *ep, struct epoll_event *event, - struct file *tfile, int fd); -static int ep_modify(struct eventpoll *ep, struct epitem *epi, - struct epoll_event *event); -static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi); -static int ep_unlink(struct eventpoll *ep, struct epitem *epi); -static int ep_remove(struct eventpoll *ep, struct epitem *epi); -static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key); -static int ep_eventpoll_close(struct inode *inode, struct file *file); -static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait); -static int ep_collect_ready_items(struct eventpoll *ep, - struct list_head *txlist, int maxevents); -static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, - struct epoll_event __user *events); -static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist); -static int ep_events_transfer(struct eventpoll *ep, - struct epoll_event __user *events, - int maxevents); -static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout); -static int eventpollfs_delete_dentry(struct dentry *dentry); -static struct inode *ep_eventpoll_inode(void); -static int eventpollfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct vfsmount *mnt); - /* - * This semaphore is used to serialize ep_free() and eventpoll_release_file(). + * This mutex is used to serialize ep_free() and eventpoll_release_file(). */ static struct mutex epmutex; @@ -288,39 +240,8 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; -/* Virtual fs used to allocate inodes for eventpoll files */ -static struct vfsmount *eventpoll_mnt __read_mostly; - -/* File callbacks that implement the eventpoll file behaviour */ -static const struct file_operations eventpoll_fops = { - .release = ep_eventpoll_close, - .poll = ep_eventpoll_poll -}; - -/* - * This is used to register the virtual file system from where - * eventpoll inodes are allocated. - */ -static struct file_system_type eventpoll_fs_type = { - .name = "eventpollfs", - .get_sb = eventpollfs_get_sb, - .kill_sb = kill_anon_super, -}; - -/* Very basic directory entry operations for the eventpoll virtual file system */ -static struct dentry_operations eventpollfs_dentry_operations = { - .d_delete = eventpollfs_delete_dentry, -}; - - -/* Fast test to see if the file is an evenpoll file */ -static inline int is_file_epoll(struct file *f) -{ - return f->f_op == &eventpoll_fops; -} - -/* Setup the structure that is used as key for the rb-tree */ +/* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, struct file *file, int fd) { @@ -328,7 +249,7 @@ static inline void ep_set_ffd(struct epoll_filefd *ffd, ffd->fd = fd; } -/* Compare rb-tree keys */ +/* Compare RB tree keys */ static inline int ep_cmp_ffd(struct epoll_filefd *p1, struct epoll_filefd *p2) { @@ -336,36 +257,25 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* Special initialization for the rb-tree node to detect linkage */ +/* Special initialization for the RB tree node to detect linkage */ static inline void ep_rb_initnode(struct rb_node *n) { rb_set_parent(n, n); } -/* Removes a node from the rb-tree and marks it for a fast is-linked check */ +/* Removes a node from the RB tree and marks it for a fast is-linked check */ static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) { rb_erase(n, r); rb_set_parent(n, n); } -/* Fast check to verify that the item is linked to the main rb-tree */ +/* Fast check to verify that the item is linked to the main RB tree */ static inline int ep_rb_linked(struct rb_node *n) { return rb_parent(n) != n; } -/* - * Remove the item from the list and perform its initialization. - * This is useful for us because we can test if the item is linked - * using "ep_is_linked(p)". - */ -static inline void ep_list_del(struct list_head *p) -{ - list_del(p); - INIT_LIST_HEAD(p); -} - /* Tells us if the item is currently linked */ static inline int ep_is_linked(struct list_head *p) { @@ -385,7 +295,7 @@ static inline struct epitem * ep_item_from_epqueue(poll_table *p) } /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ -static inline int ep_op_hash_event(int op) +static inline int ep_op_has_event(int op) { return op != EPOLL_CTL_DEL; } @@ -398,7 +308,6 @@ static void ep_poll_safewake_init(struct poll_safewake *psw) spin_lock_init(&psw->lock); } - /* * Perform a safe wake up of the poll wait list. The problem is that * with the new callback'd wake up system, it is possible that the @@ -453,378 +362,195 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) spin_unlock_irqrestore(&psw->lock, flags); } - /* - * This is called from eventpoll_release() to unlink files from the eventpoll - * interface. We need to have this facility to cleanup correctly files that are - * closed without being removed from the eventpoll interface. + * This function unregister poll callbacks from the associated file descriptor. + * Since this must be called without holding "ep->lock" the atomic exchange trick + * will protect us from multiple unregister. */ -void eventpoll_release_file(struct file *file) +static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) { - struct list_head *lsthead = &file->f_ep_links; - struct eventpoll *ep; - struct epitem *epi; + int nwait; + struct list_head *lsthead = &epi->pwqlist; + struct eppoll_entry *pwq; - /* - * We don't want to get "file->f_ep_lock" because it is not - * necessary. It is not necessary because we're in the "struct file" - * cleanup path, and this means that noone is using this file anymore. - * The only hit might come from ep_free() but by holding the semaphore - * will correctly serialize the operation. We do need to acquire - * "ep->sem" after "epmutex" because ep_remove() requires it when called - * from anywhere but ep_free(). - */ - mutex_lock(&epmutex); + /* This is called without locks, so we need the atomic exchange */ + nwait = xchg(&epi->nwait, 0); - while (!list_empty(lsthead)) { - epi = list_entry(lsthead->next, struct epitem, fllink); + if (nwait) { + while (!list_empty(lsthead)) { + pwq = list_first_entry(lsthead, struct eppoll_entry, llink); - ep = epi->ep; - ep_list_del(&epi->fllink); - down_write(&ep->sem); - ep_remove(ep, epi); - up_write(&ep->sem); + list_del_init(&pwq->llink); + remove_wait_queue(pwq->whead, &pwq->wait); + kmem_cache_free(pwq_cache, pwq); + } } - - mutex_unlock(&epmutex); } - /* - * It opens an eventpoll file descriptor by suggesting a storage of "size" - * file descriptors. The size parameter is just an hint about how to size - * data structures. It won't prevent the user to store more than "size" - * file descriptors inside the epoll interface. It is the kernel part of - * the userspace epoll_create(2). + * Removes a "struct epitem" from the eventpoll RB tree and deallocates + * all the associated resources. Must be called with "mtx" held. */ -asmlinkage long sys_epoll_create(int size) +static int ep_remove(struct eventpoll *ep, struct epitem *epi) { - int error, fd = -1; - struct eventpoll *ep; - struct inode *inode; - struct file *file; - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", - current, size)); + unsigned long flags; + struct file *file = epi->ffd.file; /* - * Sanity check on the size parameter, and create the internal data - * structure ( "struct eventpoll" ). + * Removes poll wait queue hooks. We _have_ to do this without holding + * the "ep->lock" otherwise a deadlock might occur. This because of the + * sequence of the lock acquisition. Here we do "ep->lock" then the wait + * queue head lock when unregistering the wait queue. The wakeup callback + * will run by holding the wait queue head lock and will call our callback + * that will try to get "ep->lock". */ - error = -EINVAL; - if (size <= 0 || (error = ep_alloc(&ep)) != 0) - goto eexit_1; + ep_unregister_pollwait(ep, epi); - /* - * Creates all the items needed to setup an eventpoll file. That is, - * a file structure, and inode and a free file descriptor. - */ - error = ep_getfd(&fd, &inode, &file, ep); - if (error) - goto eexit_2; + /* Remove the current item from the list of epoll hooks */ + spin_lock(&file->f_ep_lock); + if (ep_is_linked(&epi->fllink)) + list_del_init(&epi->fllink); + spin_unlock(&file->f_ep_lock); - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, size, fd)); + if (ep_rb_linked(&epi->rbn)) + ep_rb_erase(&epi->rbn, &ep->rbr); - return fd; + spin_lock_irqsave(&ep->lock, flags); + if (ep_is_linked(&epi->rdllink)) + list_del_init(&epi->rdllink); + spin_unlock_irqrestore(&ep->lock, flags); -eexit_2: - ep_free(ep); - kfree(ep); -eexit_1: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, size, error)); - return error; -} + /* At this point it is safe to free the eventpoll item */ + kmem_cache_free(epi_cache, epi); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", + current, ep, file)); -/* - * The following function implements the controller interface for - * the eventpoll file that enables the insertion/removal/change of - * file descriptors inside the interest set. It represents - * the kernel part of the user space epoll_ctl(2). - */ -asmlinkage long -sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) + return 0; +} + +static void ep_free(struct eventpoll *ep) { - int error; - struct file *file, *tfile; - struct eventpoll *ep; + struct rb_node *rbp; struct epitem *epi; - struct epoll_event epds; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", - current, epfd, op, fd, event)); - - error = -EFAULT; - if (ep_op_hash_event(op) && - copy_from_user(&epds, event, sizeof(struct epoll_event))) - goto eexit_1; - - /* Get the "struct file *" for the eventpoll file */ - error = -EBADF; - file = fget(epfd); - if (!file) - goto eexit_1; - - /* Get the "struct file *" for the target file */ - tfile = fget(fd); - if (!tfile) - goto eexit_2; - - /* The target file descriptor must support poll */ - error = -EPERM; - if (!tfile->f_op || !tfile->f_op->poll) - goto eexit_3; + /* We need to release all tasks waiting for these file */ + if (waitqueue_active(&ep->poll_wait)) + ep_poll_safewake(&psw, &ep->poll_wait); /* - * We have to check that the file structure underneath the file descriptor - * the user passed to us _is_ an eventpoll file. And also we do not permit - * adding an epoll file descriptor inside itself. + * We need to lock this because we could be hit by + * eventpoll_release_file() while we're freeing the "struct eventpoll". + * We do not need to hold "ep->mtx" here because the epoll file + * is on the way to be removed and no one has references to it + * anymore. The only hit might come from eventpoll_release_file() but + * holding "epmutex" is sufficent here. */ - error = -EINVAL; - if (file == tfile || !is_file_epoll(file)) - goto eexit_3; + mutex_lock(&epmutex); /* - * At this point it is safe to assume that the "private_data" contains - * our own data structure. + * Walks through the whole tree by unregistering poll callbacks. */ - ep = file->private_data; - - down_write(&ep->sem); - - /* Try to lookup the file inside our hash table */ - epi = ep_find(ep, tfile, fd); - - error = -EINVAL; - switch (op) { - case EPOLL_CTL_ADD: - if (!epi) { - epds.events |= POLLERR | POLLHUP; + for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + epi = rb_entry(rbp, struct epitem, rbn); - error = ep_insert(ep, &epds, tfile, fd); - } else - error = -EEXIST; - break; - case EPOLL_CTL_DEL: - if (epi) - error = ep_remove(ep, epi); - else - error = -ENOENT; - break; - case EPOLL_CTL_MOD: - if (epi) { - epds.events |= POLLERR | POLLHUP; - error = ep_modify(ep, epi, &epds); - } else - error = -ENOENT; - break; + ep_unregister_pollwait(ep, epi); } /* - * The function ep_find() increments the usage count of the structure - * so, if this is not NULL, we need to release it. + * Walks through the whole tree by freeing each "struct epitem". At this + * point we are sure no poll callbacks will be lingering around, and also by + * holding "epmutex" we can be sure that no file cleanup code will hit + * us during this operation. So we can avoid the lock on "ep->lock". */ - if (epi) - ep_release_epitem(epi); - - up_write(&ep->sem); - -eexit_3: - fput(tfile); -eexit_2: - fput(file); -eexit_1: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", - current, epfd, op, fd, event, error)); + while ((rbp = rb_first(&ep->rbr)) != 0) { + epi = rb_entry(rbp, struct epitem, rbn); + ep_remove(ep, epi); + } - return error; + mutex_unlock(&epmutex); + mutex_destroy(&ep->mtx); + kfree(ep); } - -/* - * Implement the event wait interface for the eventpoll file. It is the kernel - * part of the user space epoll_wait(2). - */ -asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) +static int ep_eventpoll_release(struct inode *inode, struct file *file) { - int error; - struct file *file; - struct eventpoll *ep; - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n", - current, epfd, events, maxevents, timeout)); - - /* The maximum number of event must be greater than zero */ - if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) - return -EINVAL; - - /* Verify that the area passed by the user is writeable */ - if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { - error = -EFAULT; - goto eexit_1; - } + struct eventpoll *ep = file->private_data; - /* Get the "struct file *" for the eventpoll file */ - error = -EBADF; - file = fget(epfd); - if (!file) - goto eexit_1; + if (ep) + ep_free(ep); - /* - * We have to check that the file structure underneath the fd - * the user passed to us _is_ an eventpoll file. - */ - error = -EINVAL; - if (!is_file_epoll(file)) - goto eexit_2; + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); + return 0; +} - /* - * At this point it is safe to assume that the "private_data" contains - * our own data structure. - */ - ep = file->private_data; +static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) +{ + unsigned int pollflags = 0; + unsigned long flags; + struct eventpoll *ep = file->private_data; - /* Time to fish for events ... */ - error = ep_poll(ep, events, maxevents, timeout); + /* Insert inside our poll wait queue */ + poll_wait(file, &ep->poll_wait, wait); -eexit_2: - fput(file); -eexit_1: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n", - current, epfd, events, maxevents, timeout, error)); + /* Check our condition */ + spin_lock_irqsave(&ep->lock, flags); + if (!list_empty(&ep->rdllist)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irqrestore(&ep->lock, flags); - return error; + return pollflags; } +/* File callbacks that implement the eventpoll file behaviour */ +static const struct file_operations eventpoll_fops = { + .release = ep_eventpoll_release, + .poll = ep_eventpoll_poll +}; -#ifdef TIF_RESTORE_SIGMASK +/* Fast test to see if the file is an evenpoll file */ +static inline int is_file_epoll(struct file *f) +{ + return f->f_op == &eventpoll_fops; +} /* - * Implement the event wait interface for the eventpoll file. It is the kernel - * part of the user space epoll_pwait(2). + * This is called from eventpoll_release() to unlink files from the eventpoll + * interface. We need to have this facility to cleanup correctly files that are + * closed without being removed from the eventpoll interface. */ -asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout, const sigset_t __user *sigmask, - size_t sigsetsize) +void eventpoll_release_file(struct file *file) { - int error; - sigset_t ksigmask, sigsaved; - - /* - * If the caller wants a certain signal mask to be set during the wait, - * we apply it here. - */ - if (sigmask) { - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - error = sys_epoll_wait(epfd, events, maxevents, timeout); + struct list_head *lsthead = &file->f_ep_links; + struct eventpoll *ep; + struct epitem *epi; /* - * If we changed the signal mask, we need to restore the original one. - * In case we've got a signal while waiting, we do not restore the - * signal mask yet, and we allow do_signal() to deliver the signal on - * the way back to userspace, before the signal mask is restored. + * We don't want to get "file->f_ep_lock" because it is not + * necessary. It is not necessary because we're in the "struct file" + * cleanup path, and this means that noone is using this file anymore. + * So, for example, epoll_ctl() cannot hit here sicne if we reach this + * point, the file counter already went to zero and fget() would fail. + * The only hit might come from ep_free() but by holding the mutex + * will correctly serialize the operation. We do need to acquire + * "ep->mtx" after "epmutex" because ep_remove() requires it when called + * from anywhere but ep_free(). */ - if (sigmask) { - if (error == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); - } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - } - - return error; -} - -#endif /* #ifdef TIF_RESTORE_SIGMASK */ - - -/* - * Creates the file descriptor to be used by the epoll interface. - */ -static int ep_getfd(int *efd, struct inode **einode, struct file **efile, - struct eventpoll *ep) -{ - struct qstr this; - char name[32]; - struct dentry *dentry; - struct inode *inode; - struct file *file; - int error, fd; + mutex_lock(&epmutex); - /* Get an ready to use file */ - error = -ENFILE; - file = get_empty_filp(); - if (!file) - goto eexit_1; + while (!list_empty(lsthead)) { + epi = list_first_entry(lsthead, struct epitem, fllink); - /* Allocates an inode from the eventpoll file system */ - inode = ep_eventpoll_inode(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - goto eexit_2; + ep = epi->ep; + list_del_init(&epi->fllink); + mutex_lock(&ep->mtx); + ep_remove(ep, epi); + mutex_unlock(&ep->mtx); } - /* Allocates a free descriptor to plug the file onto */ - error = get_unused_fd(); - if (error < 0) - goto eexit_3; - fd = error; - - /* - * Link the inode to a directory entry by creating a unique name - * using the inode number. - */ - error = -ENOMEM; - sprintf(name, "[%lu]", inode->i_ino); - this.name = name; - this.len = strlen(name); - this.hash = inode->i_ino; - dentry = d_alloc(eventpoll_mnt->mnt_sb->s_root, &this); - if (!dentry) - goto eexit_4; - dentry->d_op = &eventpollfs_dentry_operations; - d_add(dentry, inode); - file->f_path.mnt = mntget(eventpoll_mnt); - file->f_path.dentry = dentry; - file->f_mapping = inode->i_mapping; - - file->f_pos = 0; - file->f_flags = O_RDONLY; - file->f_op = &eventpoll_fops; - file->f_mode = FMODE_READ; - file->f_version = 0; - file->private_data = ep; - - /* Install the new setup file into the allocated fd. */ - fd_install(fd, file); - - *efd = fd; - *einode = inode; - *efile = file; - return 0; - -eexit_4: - put_unused_fd(fd); -eexit_3: - iput(inode); -eexit_2: - put_filp(file); -eexit_1: - return error; + mutex_unlock(&epmutex); } - static int ep_alloc(struct eventpoll **pep) { struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); @@ -832,12 +558,13 @@ static int ep_alloc(struct eventpoll **pep) if (!ep) return -ENOMEM; - rwlock_init(&ep->lock); - init_rwsem(&ep->sem); + spin_lock_init(&ep->lock); + mutex_init(&ep->mtx); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT; + ep->ovflist = EP_UNACTIVE_PTR; *pep = ep; @@ -846,65 +573,19 @@ static int ep_alloc(struct eventpoll **pep) return 0; } - -static void ep_free(struct eventpoll *ep) -{ - struct rb_node *rbp; - struct epitem *epi; - - /* We need to release all tasks waiting for these file */ - if (waitqueue_active(&ep->poll_wait)) - ep_poll_safewake(&psw, &ep->poll_wait); - - /* - * We need to lock this because we could be hit by - * eventpoll_release_file() while we're freeing the "struct eventpoll". - * We do not need to hold "ep->sem" here because the epoll file - * is on the way to be removed and no one has references to it - * anymore. The only hit might come from eventpoll_release_file() but - * holding "epmutex" is sufficent here. - */ - mutex_lock(&epmutex); - - /* - * Walks through the whole tree by unregistering poll callbacks. - */ - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { - epi = rb_entry(rbp, struct epitem, rbn); - - ep_unregister_pollwait(ep, epi); - } - - /* - * Walks through the whole hash by freeing each "struct epitem". At this - * point we are sure no poll callbacks will be lingering around, and also by - * write-holding "sem" we can be sure that no file cleanup code will hit - * us during this operation. So we can avoid the lock on "ep->lock". - */ - while ((rbp = rb_first(&ep->rbr)) != 0) { - epi = rb_entry(rbp, struct epitem, rbn); - ep_remove(ep, epi); - } - - mutex_unlock(&epmutex); -} - - /* - * Search the file inside the eventpoll hash. It add usage count to - * the returned item, so the caller must call ep_release_epitem() - * after finished using the "struct epitem". + * Search the file inside the eventpoll tree. The RB tree operations + * are protected by the "mtx" mutex, and ep_find() must be called with + * "mtx" held. */ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) { int kcmp; - unsigned long flags; struct rb_node *rbp; struct epitem *epi, *epir = NULL; struct epoll_filefd ffd; ep_set_ffd(&ffd, file, fd); - read_lock_irqsave(&ep->lock, flags); for (rbp = ep->rbr.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); kcmp = ep_cmp_ffd(&ffd, &epi->ffd); @@ -913,12 +594,10 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) else if (kcmp < 0) rbp = rbp->rb_left; else { - ep_use_epitem(epi); epir = epi; break; } } - read_unlock_irqrestore(&ep->lock, flags); DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", current, file, epir)); @@ -926,30 +605,72 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) return epir; } - /* - * Increment the usage count of the "struct epitem" making it sure - * that the user will have a valid pointer to reference. + * This is the callback that is passed to the wait queue wakeup + * machanism. It is called by the stored file descriptors when they + * have events to report. */ -static void ep_use_epitem(struct epitem *epi) +static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) { + int pwake = 0; + unsigned long flags; + struct epitem *epi = ep_item_from_wait(wait); + struct eventpoll *ep = epi->ep; - atomic_inc(&epi->usecnt); -} + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", + current, epi->ffd.file, epi, ep)); + spin_lock_irqsave(&ep->lock, flags); -/* - * Decrement ( release ) the usage count by signaling that the user - * has finished using the structure. It might lead to freeing the - * structure itself if the count goes to zero. - */ -static void ep_release_epitem(struct epitem *epi) -{ + /* + * If the event mask does not contain any poll(2) event, we consider the + * descriptor to be disabled. This condition is likely the effect of the + * EPOLLONESHOT bit that disables the descriptor when an event is received, + * until the next EPOLL_CTL_MOD will be issued. + */ + if (!(epi->event.events & ~EP_PRIVATE_BITS)) + goto out_unlock; - if (atomic_dec_and_test(&epi->usecnt)) - kmem_cache_free(epi_cache, epi); -} + /* + * If we are trasfering events to userspace, we can hold no locks + * (because we're accessing user memory, and because of linux f_op->poll() + * semantics). All the events that happens during that period of time are + * chained in ep->ovflist and requeued later on. + */ + if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (epi->next == EP_UNACTIVE_PTR) { + epi->next = ep->ovflist; + ep->ovflist = epi; + } + goto out_unlock; + } + /* If this file is already in the ready list we exit soon */ + if (ep_is_linked(&epi->rdllink)) + goto is_linked; + + list_add_tail(&epi->rdllink, &ep->rdllist); + +is_linked: + /* + * Wake up ( if active ) both the eventpoll wait list and the ->poll() + * wait list. + */ + if (waitqueue_active(&ep->wq)) + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | + TASK_INTERRUPTIBLE); + if (waitqueue_active(&ep->poll_wait)) + pwake++; + +out_unlock: + spin_unlock_irqrestore(&ep->lock, flags); + + /* We have to call this outside the lock */ + if (pwake) + ep_poll_safewake(&psw, &ep->poll_wait); + + return 1; +} /* * This is the callback that is used to add our wait queue to the @@ -974,7 +695,6 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, } } - static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) { int kcmp; @@ -994,7 +714,9 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } - +/* + * Must be called with "mtx" held. + */ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct file *tfile, int fd) { @@ -1005,19 +727,18 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, error = -ENOMEM; if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) - goto eexit_1; + goto error_return; /* Item initialization follow here ... */ ep_rb_initnode(&epi->rbn); INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); - INIT_LIST_HEAD(&epi->txlink); INIT_LIST_HEAD(&epi->pwqlist); epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; - atomic_set(&epi->usecnt, 1); epi->nwait = 0; + epi->next = EP_UNACTIVE_PTR; /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1026,7 +747,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* * Attach the item to the poll hooks and get current event bits. * We can safely use the file* here because its usage count has - * been increased by the caller of this function. + * been increased by the caller of this function. Note that after + * this operation completes, the poll callback can start hitting + * the new item. */ revents = tfile->f_op->poll(tfile, &epq.pt); @@ -1036,19 +759,22 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, * high memory pressure. */ if (epi->nwait < 0) - goto eexit_2; + goto error_unregister; /* Add the current item to the list of active epoll hook for this file */ spin_lock(&tfile->f_ep_lock); list_add_tail(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_ep_lock); - /* We have to drop the new item inside our item list to keep track of it */ - write_lock_irqsave(&ep->lock, flags); - - /* Add the current item to the rb-tree */ + /* + * Add the current item to the RB tree. All RB tree operations are + * protected by "mtx", and ep_insert() is called with "mtx" held. + */ ep_rbtree_insert(ep, epi); + /* We have to drop the new item inside our item list to keep track of it */ + spin_lock_irqsave(&ep->lock, flags); + /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); @@ -1060,7 +786,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, pwake++; } - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -1071,27 +797,28 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, return 0; -eexit_2: +error_unregister: ep_unregister_pollwait(ep, epi); /* * We need to do this because an event could have been arrived on some - * allocated wait queue. + * allocated wait queue. Note that we don't care about the ep->ovflist + * list, since that is used/cleaned only inside a section bound by "mtx". + * And ep_insert() is called with "mtx" held. */ - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) - ep_list_del(&epi->rdllink); - write_unlock_irqrestore(&ep->lock, flags); + list_del_init(&epi->rdllink); + spin_unlock_irqrestore(&ep->lock, flags); kmem_cache_free(epi_cache, epi); -eexit_1: +error_return: return error; } - /* * Modify the interest event mask by dropping an event if the new mask - * has a match in the current file status. + * has a match in the current file status. Must be called with "mtx" held. */ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) { @@ -1113,36 +840,28 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); /* Copy the data member from inside the lock */ epi->event.data = event->data; /* - * If the item is not linked to the hash it means that it's on its - * way toward the removal. Do nothing in this case. + * If the item is "hot" and it is not registered inside the ready + * list, push it inside. */ - if (ep_rb_linked(&epi->rbn)) { - /* - * If the item is "hot" and it is not registered inside the ready - * list, push it inside. If the item is not "hot" and it is currently - * registered inside the ready list, unlink it. - */ - if (revents & event->events) { - if (!ep_is_linked(&epi->rdllink)) { - list_add_tail(&epi->rdllink, &ep->rdllist); - - /* Notify waiting tasks that events are available */ - if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | - TASK_INTERRUPTIBLE); - if (waitqueue_active(&ep->poll_wait)) - pwake++; - } + if (revents & event->events) { + if (!ep_is_linked(&epi->rdllink)) { + list_add_tail(&epi->rdllink, &ep->rdllist); + + /* Notify waiting tasks that events are available */ + if (waitqueue_active(&ep->wq)) + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | + TASK_INTERRUPTIBLE); + if (waitqueue_active(&ep->poll_wait)) + pwake++; } } - - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -1151,350 +870,113 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even return 0; } - -/* - * This function unregister poll callbacks from the associated file descriptor. - * Since this must be called without holding "ep->lock" the atomic exchange trick - * will protect us from multiple unregister. - */ -static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) -{ - int nwait; - struct list_head *lsthead = &epi->pwqlist; - struct eppoll_entry *pwq; - - /* This is called without locks, so we need the atomic exchange */ - nwait = xchg(&epi->nwait, 0); - - if (nwait) { - while (!list_empty(lsthead)) { - pwq = list_entry(lsthead->next, struct eppoll_entry, llink); - - ep_list_del(&pwq->llink); - remove_wait_queue(pwq->whead, &pwq->wait); - kmem_cache_free(pwq_cache, pwq); - } - } -} - - -/* - * Unlink the "struct epitem" from all places it might have been hooked up. - * This function must be called with write IRQ lock on "ep->lock". - */ -static int ep_unlink(struct eventpoll *ep, struct epitem *epi) +static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, + int maxevents) { - int error; - - /* - * It can happen that this one is called for an item already unlinked. - * The check protect us from doing a double unlink ( crash ). - */ - error = -ENOENT; - if (!ep_rb_linked(&epi->rbn)) - goto eexit_1; - - /* - * Clear the event mask for the unlinked item. This will avoid item - * notifications to be sent after the unlink operation from inside - * the kernel->userspace event transfer loop. - */ - epi->event.events = 0; - - /* - * At this point is safe to do the job, unlink the item from our rb-tree. - * This operation togheter with the above check closes the door to - * double unlinks. - */ - ep_rb_erase(&epi->rbn, &ep->rbr); - - /* - * If the item we are going to remove is inside the ready file descriptors - * we want to remove it from this list to avoid stale events. - */ - if (ep_is_linked(&epi->rdllink)) - ep_list_del(&epi->rdllink); - - error = 0; -eexit_1: - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", - current, ep, epi->ffd.file, error)); - - return error; -} - - -/* - * Removes a "struct epitem" from the eventpoll hash and deallocates - * all the associated resources. - */ -static int ep_remove(struct eventpoll *ep, struct epitem *epi) -{ - int error; - unsigned long flags; - struct file *file = epi->ffd.file; - - /* - * Removes poll wait queue hooks. We _have_ to do this without holding - * the "ep->lock" otherwise a deadlock might occur. This because of the - * sequence of the lock acquisition. Here we do "ep->lock" then the wait - * queue head lock when unregistering the wait queue. The wakeup callback - * will run by holding the wait queue head lock and will call our callback - * that will try to get "ep->lock". - */ - ep_unregister_pollwait(ep, epi); - - /* Remove the current item from the list of epoll hooks */ - spin_lock(&file->f_ep_lock); - if (ep_is_linked(&epi->fllink)) - ep_list_del(&epi->fllink); - spin_unlock(&file->f_ep_lock); - - /* We need to acquire the write IRQ lock before calling ep_unlink() */ - write_lock_irqsave(&ep->lock, flags); - - /* Really unlink the item from the hash */ - error = ep_unlink(ep, epi); - - write_unlock_irqrestore(&ep->lock, flags); - - if (error) - goto eexit_1; - - /* At this point it is safe to free the eventpoll item */ - ep_release_epitem(epi); - - error = 0; -eexit_1: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n", - current, ep, file, error)); - - return error; -} - - -/* - * This is the callback that is passed to the wait queue wakeup - * machanism. It is called by the stored file descriptors when they - * have events to report. - */ -static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - int pwake = 0; + int eventcnt, error = -EFAULT, pwake = 0; + unsigned int revents; unsigned long flags; - struct epitem *epi = ep_item_from_wait(wait); - struct eventpoll *ep = epi->ep; - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", - current, epi->ffd.file, epi, ep)); + struct epitem *epi, *nepi; + struct list_head txlist; - write_lock_irqsave(&ep->lock, flags); + INIT_LIST_HEAD(&txlist); /* - * If the event mask does not contain any poll(2) event, we consider the - * descriptor to be disabled. This condition is likely the effect of the - * EPOLLONESHOT bit that disables the descriptor when an event is received, - * until the next EPOLL_CTL_MOD will be issued. + * We need to lock this because we could be hit by + * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). */ - if (!(epi->event.events & ~EP_PRIVATE_BITS)) - goto is_disabled; + mutex_lock(&ep->mtx); - /* If this file is already in the ready list we exit soon */ - if (ep_is_linked(&epi->rdllink)) - goto is_linked; - - list_add_tail(&epi->rdllink, &ep->rdllist); - -is_linked: /* - * Wake up ( if active ) both the eventpoll wait list and the ->poll() - * wait list. + * Steal the ready list, and re-init the original one to the + * empty list. Also, set ep->ovflist to NULL so that events + * happening while looping w/out locks, are not lost. We cannot + * have the poll callback to queue directly on ep->rdllist, + * because we are doing it in the loop below, in a lockless way. */ - if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | - TASK_INTERRUPTIBLE); - if (waitqueue_active(&ep->poll_wait)) - pwake++; - -is_disabled: - write_unlock_irqrestore(&ep->lock, flags); - - /* We have to call this outside the lock */ - if (pwake) - ep_poll_safewake(&psw, &ep->poll_wait); - - return 1; -} - - -static int ep_eventpoll_close(struct inode *inode, struct file *file) -{ - struct eventpoll *ep = file->private_data; - - if (ep) { - ep_free(ep); - kfree(ep); - } - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); - return 0; -} - - -static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) -{ - unsigned int pollflags = 0; - unsigned long flags; - struct eventpoll *ep = file->private_data; - - /* Insert inside our poll wait queue */ - poll_wait(file, &ep->poll_wait, wait); - - /* Check our condition */ - read_lock_irqsave(&ep->lock, flags); - if (!list_empty(&ep->rdllist)) - pollflags = POLLIN | POLLRDNORM; - read_unlock_irqrestore(&ep->lock, flags); - - return pollflags; -} - - -/* - * Since we have to release the lock during the __copy_to_user() operation and - * during the f_op->poll() call, we try to collect the maximum number of items - * by reducing the irqlock/irqunlock switching rate. - */ -static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist, int maxevents) -{ - int nepi; - unsigned long flags; - struct list_head *lsthead = &ep->rdllist, *lnk; - struct epitem *epi; - - write_lock_irqsave(&ep->lock, flags); - - for (nepi = 0, lnk = lsthead->next; lnk != lsthead && nepi < maxevents;) { - epi = list_entry(lnk, struct epitem, rdllink); - - lnk = lnk->next; - - /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(&epi->txlink)) { - /* - * This is initialized in this way so that the default - * behaviour of the reinjecting code will be to push back - * the item inside the ready list. - */ - epi->revents = epi->event.events; - - /* Link the ready item into the transfer list */ - list_add(&epi->txlink, txlist); - nepi++; - - /* - * Unlink the item from the ready list. - */ - ep_list_del(&epi->rdllink); - } - } - - write_unlock_irqrestore(&ep->lock, flags); - - return nepi; -} - - -/* - * This function is called without holding the "ep->lock" since the call to - * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ - * because of the way poll() is traditionally implemented in Linux. - */ -static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, - struct epoll_event __user *events) -{ - int eventcnt = 0; - unsigned int revents; - struct list_head *lnk; - struct epitem *epi; + spin_lock_irqsave(&ep->lock, flags); + list_splice(&ep->rdllist, &txlist); + INIT_LIST_HEAD(&ep->rdllist); + ep->ovflist = NULL; + spin_unlock_irqrestore(&ep->lock, flags); /* * We can loop without lock because this is a task private list. - * The test done during the collection loop will guarantee us that - * another task will not try to collect this file. Also, items - * cannot vanish during the loop because we are holding "sem". + * We just splice'd out the ep->rdllist in ep_collect_ready_items(). + * Items cannot vanish during the loop because we are holding "mtx". */ - list_for_each(lnk, txlist) { - epi = list_entry(lnk, struct epitem, txlink); + for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) { + epi = list_first_entry(&txlist, struct epitem, rdllink); + + list_del_init(&epi->rdllink); /* * Get the ready file event set. We can safely use the file - * because we are holding the "sem" in read and this will - * guarantee that both the file and the item will not vanish. + * because we are holding the "mtx" and this will guarantee + * that both the file and the item will not vanish. */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); + revents &= epi->event.events; /* - * Set the return event set for the current file descriptor. - * Note that only the task task was successfully able to link - * the item to its "txlist" will write this field. + * Is the event mask intersect the caller-requested one, + * deliver the event to userspace. Again, we are holding + * "mtx", so no operations coming from userspace can change + * the item. */ - epi->revents = revents & epi->event.events; - - if (epi->revents) { - if (__put_user(epi->revents, + if (revents) { + if (__put_user(revents, &events[eventcnt].events) || __put_user(epi->event.data, &events[eventcnt].data)) - return -EFAULT; + goto errxit; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; eventcnt++; } + /* + * At this point, noone can insert into ep->rdllist besides + * us. The epoll_ctl() callers are locked out by us holding + * "mtx" and the poll callback will queue them in ep->ovflist. + */ + if (!(epi->event.events & EPOLLET) && + (revents & epi->event.events)) + list_add_tail(&epi->rdllink, &ep->rdllist); } - return eventcnt; -} - - -/* - * Walk through the transfer list we collected with ep_collect_ready_items() - * and, if 1) the item is still "alive" 2) its event set is not empty 3) it's - * not already linked, links it to the ready list. Same as above, we are holding - * "sem" so items cannot vanish underneath our nose. - */ -static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) -{ - int ricnt = 0, pwake = 0; - unsigned long flags; - struct epitem *epi; - - write_lock_irqsave(&ep->lock, flags); - - while (!list_empty(txlist)) { - epi = list_entry(txlist->next, struct epitem, txlink); + error = 0; - /* Unlink the current item from the transfer list */ - ep_list_del(&epi->txlink); +errxit: - /* - * If the item is no more linked to the interest set, we don't - * have to push it inside the ready list because the following - * ep_release_epitem() is going to drop it. Also, if the current - * item is set to have an Edge Triggered behaviour, we don't have - * to push it back either. - */ - if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) && - (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) { + spin_lock_irqsave(&ep->lock, flags); + /* + * During the time we spent in the loop above, some other events + * might have been queued by the poll callback. We re-insert them + * here (in case they are not already queued, or they're one-shot). + */ + for (nepi = ep->ovflist; (epi = nepi) != NULL; + nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { + if (!ep_is_linked(&epi->rdllink) && + (epi->event.events & ~EP_PRIVATE_BITS)) list_add_tail(&epi->rdllink, &ep->rdllist); - ricnt++; - } } + /* + * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after + * releasing the lock, events will be queued in the normal way inside + * ep->rdllist. + */ + ep->ovflist = EP_UNACTIVE_PTR; - if (ricnt) { + /* + * In case of error in the event-send loop, or in case the number of + * ready events exceeds the userspace limit, we need to splice the + * "txlist" back inside ep->rdllist. + */ + list_splice(&txlist, &ep->rdllist); + + if (!list_empty(&ep->rdllist)) { /* - * Wake up ( if active ) both the eventpoll wait list and the ->poll() - * wait list. + * Wake up (if active) both the eventpoll wait list and the ->poll() + * wait list (delayed after we release the lock). */ if (waitqueue_active(&ep->wq)) __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | @@ -1502,47 +984,17 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) if (waitqueue_active(&ep->poll_wait)) pwake++; } + spin_unlock_irqrestore(&ep->lock, flags); - write_unlock_irqrestore(&ep->lock, flags); + mutex_unlock(&ep->mtx); /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(&psw, &ep->poll_wait); -} - -/* - * Perform the transfer of events to user space. - */ -static int ep_events_transfer(struct eventpoll *ep, - struct epoll_event __user *events, int maxevents) -{ - int eventcnt = 0; - struct list_head txlist; - - INIT_LIST_HEAD(&txlist); - - /* - * We need to lock this because we could be hit by - * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). - */ - down_read(&ep->sem); - - /* Collect/extract ready items */ - if (ep_collect_ready_items(ep, &txlist, maxevents) > 0) { - /* Build result set in userspace */ - eventcnt = ep_send_events(ep, &txlist, events); - - /* Reinject ready items into the ready list */ - ep_reinject_items(ep, &txlist); - } - - up_read(&ep->sem); - - return eventcnt; + return eventcnt == 0 ? error: eventcnt; } - static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { @@ -1560,7 +1012,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; retry: - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); res = 0; if (list_empty(&ep->rdllist)) { @@ -1570,6 +1022,7 @@ retry: * ep_poll_callback() when events will become available. */ init_waitqueue_entry(&wait, current); + wait.flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue(&ep->wq, &wait); for (;;) { @@ -1586,9 +1039,9 @@ retry: break; } - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); jtimeout = schedule_timeout(jtimeout); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); } __remove_wait_queue(&ep->wq, &wait); @@ -1598,7 +1051,7 @@ retry: /* Is it worth to try to dig for events ? */ eavail = !list_empty(&ep->rdllist); - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* * Try to transfer events to user space. In case we get 0 events and @@ -1606,61 +1059,263 @@ retry: * more luck. */ if (!res && eavail && - !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout) + !(res = ep_send_events(ep, events, maxevents)) && jtimeout) goto retry; return res; } - -static int eventpollfs_delete_dentry(struct dentry *dentry) +/* + * It opens an eventpoll file descriptor. The "size" parameter is there + * for historical reasons, when epoll was using an hash instead of an + * RB tree. With the current implementation, the "size" parameter is ignored + * (besides sanity checks). + */ +asmlinkage long sys_epoll_create(int size) { + int error, fd = -1; + struct eventpoll *ep; + struct inode *inode; + struct file *file; - return 1; + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", + current, size)); + + /* + * Sanity check on the size parameter, and create the internal data + * structure ( "struct eventpoll" ). + */ + error = -EINVAL; + if (size <= 0 || (error = ep_alloc(&ep)) != 0) + goto error_return; + + /* + * Creates all the items needed to setup an eventpoll file. That is, + * a file structure, and inode and a free file descriptor. + */ + error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", + &eventpoll_fops, ep); + if (error) + goto error_free; + + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", + current, size, fd)); + + return fd; + +error_free: + ep_free(ep); +error_return: + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", + current, size, error)); + return error; } +/* + * The following function implements the controller interface for + * the eventpoll file that enables the insertion/removal/change of + * file descriptors inside the interest set. + */ +asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, + struct epoll_event __user *event) +{ + int error; + struct file *file, *tfile; + struct eventpoll *ep; + struct epitem *epi; + struct epoll_event epds; + + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", + current, epfd, op, fd, event)); -static struct inode *ep_eventpoll_inode(void) + error = -EFAULT; + if (ep_op_has_event(op) && + copy_from_user(&epds, event, sizeof(struct epoll_event))) + goto error_return; + + /* Get the "struct file *" for the eventpoll file */ + error = -EBADF; + file = fget(epfd); + if (!file) + goto error_return; + + /* Get the "struct file *" for the target file */ + tfile = fget(fd); + if (!tfile) + goto error_fput; + + /* The target file descriptor must support poll */ + error = -EPERM; + if (!tfile->f_op || !tfile->f_op->poll) + goto error_tgt_fput; + + /* + * We have to check that the file structure underneath the file descriptor + * the user passed to us _is_ an eventpoll file. And also we do not permit + * adding an epoll file descriptor inside itself. + */ + error = -EINVAL; + if (file == tfile || !is_file_epoll(file)) + goto error_tgt_fput; + + /* + * At this point it is safe to assume that the "private_data" contains + * our own data structure. + */ + ep = file->private_data; + + mutex_lock(&ep->mtx); + + /* + * Try to lookup the file inside our RB tree, Since we grabbed "mtx" + * above, we can be sure to be able to use the item looked up by + * ep_find() till we release the mutex. + */ + epi = ep_find(ep, tfile, fd); + + error = -EINVAL; + switch (op) { + case EPOLL_CTL_ADD: + if (!epi) { + epds.events |= POLLERR | POLLHUP; + + error = ep_insert(ep, &epds, tfile, fd); + } else + error = -EEXIST; + break; + case EPOLL_CTL_DEL: + if (epi) + error = ep_remove(ep, epi); + else + error = -ENOENT; + break; + case EPOLL_CTL_MOD: + if (epi) { + epds.events |= POLLERR | POLLHUP; + error = ep_modify(ep, epi, &epds); + } else + error = -ENOENT; + break; + } + mutex_unlock(&ep->mtx); + +error_tgt_fput: + fput(tfile); +error_fput: + fput(file); +error_return: + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", + current, epfd, op, fd, event, error)); + + return error; +} + +/* + * Implement the event wait interface for the eventpoll file. It is the kernel + * part of the user space epoll_wait(2). + */ +asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout) { - int error = -ENOMEM; - struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); + int error; + struct file *file; + struct eventpoll *ep; + + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n", + current, epfd, events, maxevents, timeout)); + + /* The maximum number of event must be greater than zero */ + if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) + return -EINVAL; - if (!inode) - goto eexit_1; + /* Verify that the area passed by the user is writeable */ + if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { + error = -EFAULT; + goto error_return; + } + + /* Get the "struct file *" for the eventpoll file */ + error = -EBADF; + file = fget(epfd); + if (!file) + goto error_return; - inode->i_fop = &eventpoll_fops; + /* + * We have to check that the file structure underneath the fd + * the user passed to us _is_ an eventpoll file. + */ + error = -EINVAL; + if (!is_file_epoll(file)) + goto error_fput; /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. + * At this point it is safe to assume that the "private_data" contains + * our own data structure. */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; - -eexit_1: - return ERR_PTR(error); + ep = file->private_data; + + /* Time to fish for events ... */ + error = ep_poll(ep, events, maxevents, timeout); + +error_fput: + fput(file); +error_return: + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n", + current, epfd, events, maxevents, timeout, error)); + + return error; } +#ifdef TIF_RESTORE_SIGMASK -static int -eventpollfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +/* + * Implement the event wait interface for the eventpoll file. It is the kernel + * part of the user space epoll_pwait(2). + */ +asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout, const sigset_t __user *sigmask, + size_t sigsetsize) { - return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC, - mnt); + int error; + sigset_t ksigmask, sigsaved; + + /* + * If the caller wants a certain signal mask to be set during the wait, + * we apply it here. + */ + if (sigmask) { + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) + return -EFAULT; + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + error = sys_epoll_wait(epfd, events, maxevents, timeout); + + /* + * If we changed the signal mask, we need to restore the original one. + * In case we've got a signal while waiting, we do not restore the + * signal mask yet, and we allow do_signal() to deliver the signal on + * the way back to userspace, before the signal mask is restored. + */ + if (sigmask) { + if (error == -EINTR) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_thread_flag(TIF_RESTORE_SIGMASK); + } else + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + } + + return error; } +#endif /* #ifdef TIF_RESTORE_SIGMASK */ static int __init eventpoll_init(void) { - int error; - mutex_init(&epmutex); /* Initialize the structure used to perform safe poll wait head wake ups */ @@ -1676,39 +1331,7 @@ static int __init eventpoll_init(void) sizeof(struct eppoll_entry), 0, EPI_SLAB_DEBUG|SLAB_PANIC, NULL, NULL); - /* - * Register the virtual file system that will be the source of inodes - * for the eventpoll files - */ - error = register_filesystem(&eventpoll_fs_type); - if (error) - goto epanic; - - /* Mount the above commented virtual file system */ - eventpoll_mnt = kern_mount(&eventpoll_fs_type); - error = PTR_ERR(eventpoll_mnt); - if (IS_ERR(eventpoll_mnt)) - goto epanic; - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: successfully initialized.\n", - current)); return 0; - -epanic: - panic("eventpoll_init() failed\n"); } +fs_initcall(eventpoll_init); - -static void __exit eventpoll_exit(void) -{ - /* Undo all operations done inside eventpoll_init() */ - unregister_filesystem(&eventpoll_fs_type); - mntput(eventpoll_mnt); - kmem_cache_destroy(pwq_cache); - kmem_cache_destroy(epi_cache); -} - -module_init(eventpoll_init); -module_exit(eventpoll_exit); - -MODULE_LICENSE("GPL"); diff --git a/fs/exec.c b/fs/exec.c index 3155e915307..f20561ff452 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -50,6 +50,7 @@ #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> +#include <linux/signalfd.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -59,7 +60,7 @@ #endif int core_uses_pid; -char core_pattern[128] = "core"; +char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; EXPORT_SYMBOL(suid_dumpable); @@ -100,6 +101,7 @@ int unregister_binfmt(struct linux_binfmt * fmt) while (*tmp) { if (fmt == *tmp) { *tmp = fmt->next; + fmt->next = NULL; write_unlock(&binfmt_lock); return 0; } @@ -132,6 +134,9 @@ asmlinkage long sys_uselib(const char __user * library) if (error) goto out; + error = -EACCES; + if (nd.mnt->mnt_flags & MNT_NOEXEC) + goto exit; error = -EINVAL; if (!S_ISREG(nd.dentry->d_inode->i_mode)) goto exit; @@ -581,6 +586,13 @@ static int de_thread(struct task_struct *tsk) int count; /* + * Tell all the sighand listeners that this sighand has + * been detached. The signalfd_detach() function grabs the + * sighand lock, if signal listeners are present on the sighand. + */ + signalfd_detach(tsk); + + /* * If we don't share sighandlers, then we aren't sharing anything * and we can just re-use it all. */ @@ -701,7 +713,7 @@ static int de_thread(struct task_struct *tsk) */ detach_pid(tsk, PIDTYPE_PID); tsk->pid = leader->pid; - attach_pid(tsk, PIDTYPE_PID, tsk->pid); + attach_pid(tsk, PIDTYPE_PID, find_pid(tsk->pid)); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); list_replace_rcu(&leader->tasks, &tsk->tasks); @@ -756,8 +768,7 @@ no_thread_group: spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); - if (atomic_dec_and_test(&oldsighand->count)) - kmem_cache_free(sighand_cachep, oldsighand); + __cleanup_sighand(oldsighand); } BUG_ON(!thread_group_leader(tsk)); @@ -982,33 +993,51 @@ void compute_creds(struct linux_binprm *bprm) task_unlock(current); security_bprm_post_apply_creds(bprm); } - EXPORT_SYMBOL(compute_creds); +/* + * Arguments are '\0' separated strings found at the location bprm->p + * points to; chop off the first by relocating brpm->p to right after + * the first '\0' encountered. + */ void remove_arg_zero(struct linux_binprm *bprm) { if (bprm->argc) { - unsigned long offset; - char * kaddr; - struct page *page; + char ch; + + do { + unsigned long offset; + unsigned long index; + char *kaddr; + struct page *page; - offset = bprm->p % PAGE_SIZE; - goto inside; + offset = bprm->p & ~PAGE_MASK; + index = bprm->p >> PAGE_SHIFT; - while (bprm->p++, *(kaddr+offset++)) { - if (offset != PAGE_SIZE) - continue; - offset = 0; - kunmap_atomic(kaddr, KM_USER0); -inside: - page = bprm->page[bprm->p/PAGE_SIZE]; + page = bprm->page[index]; kaddr = kmap_atomic(page, KM_USER0); - } - kunmap_atomic(kaddr, KM_USER0); + + /* run through page until we reach end or find NUL */ + do { + ch = *(kaddr + offset); + + /* discard that character... */ + bprm->p++; + offset++; + } while (offset < PAGE_SIZE && ch != '\0'); + + kunmap_atomic(kaddr, KM_USER0); + + /* free the old page */ + if (offset == PAGE_SIZE) { + __free_page(page); + bprm->page[index] = NULL; + } + } while (ch != '\0'); + bprm->argc--; } } - EXPORT_SYMBOL(remove_arg_zero); /* @@ -1238,8 +1267,6 @@ int set_binfmt(struct linux_binfmt *new) EXPORT_SYMBOL(set_binfmt); -#define CORENAME_MAX_SIZE 64 - /* format_corename will inspect the pattern parameter, and output a * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. @@ -1469,6 +1496,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) int flag = 0; int ispipe = 0; + audit_core_dumps(signr); + binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 93e77c3d249..e98f6cd7200 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -2,7 +2,6 @@ #include <linux/fs.h> #include <linux/file.h> #include <linux/module.h> -#include <linux/smp_lock.h> #include <linux/namei.h> struct export_operations export_op_default; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 1d1e7e30d70..2bf49d7ef84 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -23,7 +23,6 @@ #include "ext2.h" #include <linux/pagemap.h> -#include <linux/smp_lock.h> typedef struct ext2_dir_entry_2 ext2_dirent; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index e2a0ea50af1..9fd0ec5ba0d 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -133,6 +133,7 @@ extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); +extern void ext2_get_inode_flags(struct ext2_inode_info *); /* ioctl.c */ extern int ext2_ioctl (struct inode *, struct file *, unsigned int, diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c index 7806b9e8155..fc66c93fcb5 100644 --- a/fs/ext2/fsync.c +++ b/fs/ext2/fsync.c @@ -23,7 +23,6 @@ */ #include "ext2.h" -#include <linux/smp_lock.h> #include <linux/buffer_head.h> /* for sync_mapping_buffers() */ diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index dd4e14c221e..0079b2cd531 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1055,6 +1055,25 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } +/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ +void ext2_get_inode_flags(struct ext2_inode_info *ei) +{ + unsigned int flags = ei->vfs_inode.i_flags; + + ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL| + EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL); + if (flags & S_SYNC) + ei->i_flags |= EXT2_SYNC_FL; + if (flags & S_APPEND) + ei->i_flags |= EXT2_APPEND_FL; + if (flags & S_IMMUTABLE) + ei->i_flags |= EXT2_IMMUTABLE_FL; + if (flags & S_NOATIME) + ei->i_flags |= EXT2_NOATIME_FL; + if (flags & S_DIRSYNC) + ei->i_flags |= EXT2_DIRSYNC_FL; +} + void ext2_read_inode (struct inode * inode) { struct ext2_inode_info *ei = EXT2_I(inode); @@ -1079,9 +1098,9 @@ void ext2_read_inode (struct inode * inode) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); + inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. @@ -1188,6 +1207,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync) if (ei->i_state & EXT2_STATE_NEW) memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); + ext2_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if (!(test_opt(sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 4b099d31071..e85c4821823 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -27,6 +27,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, switch (cmd) { case EXT2_IOC_GETFLAGS: + ext2_get_inode_flags(ei); flags = ei->i_flags & EXT2_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT2_IOC_SETFLAGS: { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 685a1c28717..5de5061eb33 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -160,13 +160,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - rwlock_init(&ei->i_meta_lock); + rwlock_init(&ei->i_meta_lock); #ifdef CONFIG_EXT2_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - inode_init_once(&ei->vfs_inode); - } + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) @@ -1040,6 +1038,15 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset + EXT2_MOUNT_XIP if not */ + + if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) { + printk("XIP: Unsupported blocksize\n"); + err = -EINVAL; + goto restore_opts; + } + es = sbi->s_es; if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != (old_mount_opt & EXT2_MOUNT_XIP)) && diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index a2661279847..eaa23d2d521 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -6,7 +6,6 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext2_fs.h> #include <linux/security.h> #include "xattr.h" diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index f28a6a499c9..83ee149f353 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -9,7 +9,6 @@ #include <linux/string.h> #include <linux/capability.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext2_fs.h> #include "xattr.h" diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 665adee99b3..852869840f2 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -25,7 +25,6 @@ #include <linux/jbd.h> #include <linux/ext3_fs.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> #include <linux/slab.h> #include <linux/rbtree.h> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a5b150f7e8a..2a85ddee474 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -27,7 +27,6 @@ #include <linux/time.h> #include <linux/ext3_jbd.h> #include <linux/jbd.h> -#include <linux/smp_lock.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/quotaops.h> @@ -1768,7 +1767,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, struct inode *inode = mapping->host; struct buffer_head *bh; int err = 0; - void *kaddr; blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); @@ -1780,10 +1778,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, */ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode) && PageUptodate(page)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, length, KM_USER0); set_page_dirty(page); goto unlock; } @@ -1836,11 +1831,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - + zero_user_page(page, offset, length, KM_USER0); BUFFER_TRACE(bh, "zeroed end of block"); err = 0; @@ -2581,6 +2572,25 @@ void ext3_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } +/* Propagate flags from i_flags to EXT3_I(inode)->i_flags */ +void ext3_get_inode_flags(struct ext3_inode_info *ei) +{ + unsigned int flags = ei->vfs_inode.i_flags; + + ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL| + EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL); + if (flags & S_SYNC) + ei->i_flags |= EXT3_SYNC_FL; + if (flags & S_APPEND) + ei->i_flags |= EXT3_APPEND_FL; + if (flags & S_IMMUTABLE) + ei->i_flags |= EXT3_IMMUTABLE_FL; + if (flags & S_NOATIME) + ei->i_flags |= EXT3_NOATIME_FL; + if (flags & S_DIRSYNC) + ei->i_flags |= EXT3_DIRSYNC_FL; +} + void ext3_read_inode(struct inode * inode) { struct ext3_iloc iloc; @@ -2608,9 +2618,9 @@ void ext3_read_inode(struct inode * inode) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); + inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; ei->i_state = 0; @@ -2667,8 +2677,10 @@ void ext3_read_inode(struct inode * inode) */ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT3_INODE_SIZE(inode->i_sb)) + EXT3_INODE_SIZE(inode->i_sb)) { + brelse (bh); goto bad_inode; + } if (ei->i_extra_isize == 0) { /* The extra space is currently unused. Use it. */ ei->i_extra_isize = sizeof(struct ext3_inode) - @@ -2736,6 +2748,7 @@ static int ext3_do_update_inode(handle_t *handle, if (ei->i_state & EXT3_STATE_NEW) memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); + ext3_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 9b8090d94e6..965006dba6b 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -28,6 +28,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, switch (cmd) { case EXT3_IOC_GETFLAGS: + ext3_get_inode_flags(ei); flags = ei->i_flags & EXT3_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT3_IOC_SETFLAGS: { diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 49159f13cc1..9bb046df827 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -36,7 +36,6 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bio.h> -#include <linux/smp_lock.h> #include "namei.h" #include "xattr.h" @@ -969,6 +968,7 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, (block<<EXT3_BLOCK_SIZE_BITS(sb)) +((char *)de - bh->b_data))) { brelse (bh); + *err = ERR_BAD_DX_DIR; goto errout; } *res_dir = de; @@ -1134,9 +1134,9 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split; struct ext3_dir_entry_2 *de = NULL, *de2; - int err; + int err = 0; - bh2 = ext3_append (handle, dir, &newblock, error); + bh2 = ext3_append (handle, dir, &newblock, &err); if (!(bh2)) { brelse(*bh); *bh = NULL; @@ -1145,14 +1145,9 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, BUFFER_TRACE(*bh, "get_write_access"); err = ext3_journal_get_write_access(handle, *bh); - if (err) { - journal_error: - brelse(*bh); - brelse(bh2); - *bh = NULL; - ext3_std_error(dir->i_sb, err); - goto errout; - } + if (err) + goto journal_error; + BUFFER_TRACE(frame->bh, "get_write_access"); err = ext3_journal_get_write_access(handle, frame->bh); if (err) @@ -1195,8 +1190,16 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, goto journal_error; brelse (bh2); dxtrace(dx_show_index ("frame", frame->entries)); -errout: return de; + +journal_error: + brelse(*bh); + brelse(bh2); + *bh = NULL; + ext3_std_error(dir->i_sb, err); +errout: + *error = err; + return NULL; } #endif diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index ecf89904c11..2c97e09c6c6 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -11,7 +11,6 @@ #define EXT3FS_DEBUG -#include <linux/smp_lock.h> #include <linux/ext3_jbd.h> #include <linux/errno.h> diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 54d3c904125..6e3062913a9 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -466,14 +466,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_orphan); + INIT_LIST_HEAD(&ei->i_orphan); #ifdef CONFIG_EXT3_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - mutex_init(&ei->truncate_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->truncate_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index b9c40c15647..821efaf2b94 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c @@ -6,7 +6,6 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext3_jbd.h> #include <linux/ext3_fs.h> #include <linux/security.h> diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index 86d91f1186d..0327497a55c 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c @@ -9,7 +9,6 @@ #include <linux/string.h> #include <linux/capability.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext3_jbd.h> #include <linux/ext3_fs.h> #include "xattr.h" diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index a85a0a17c4f..1abd8f92c44 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -8,7 +8,6 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext3_jbd.h> #include <linux/ext3_fs.h> #include "xattr.h" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 8a23483ca8d..3b64bb16c72 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -30,15 +30,15 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, unsigned long *blockgrpp, ext4_grpblk_t *offsetp) { - struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; ext4_grpblk_t offset; - blocknr = blocknr - le32_to_cpu(es->s_first_data_block); + blocknr = blocknr - le32_to_cpu(es->s_first_data_block); offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); if (offsetp) *offsetp = offset; if (blockgrpp) - *blockgrpp = blocknr; + *blockgrpp = blocknr; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index da80368b66f..e8ad06e2831 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -25,7 +25,6 @@ #include <linux/jbd2.h> #include <linux/ext4_fs.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> #include <linux/slab.h> #include <linux/rbtree.h> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7916b50f9a1..b9ce2412907 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -34,7 +34,6 @@ #include <linux/time.h> #include <linux/ext4_jbd2.h> #include <linux/jbd.h> -#include <linux/smp_lock.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/quotaops.h> @@ -375,7 +374,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc le32_to_cpu(ix[-1].ei_block)); } BUG_ON(k && le32_to_cpu(ix->ei_block) - <= le32_to_cpu(ix[-1].ei_block)); + <= le32_to_cpu(ix[-1].ei_block)); if (block < le32_to_cpu(ix->ei_block)) break; chix = ix; @@ -424,8 +423,8 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) path->p_ext = l - 1; ext_debug(" -> %d:%llu:%d ", - le32_to_cpu(path->p_ext->ee_block), - ext_pblock(path->p_ext), + le32_to_cpu(path->p_ext->ee_block), + ext_pblock(path->p_ext), le16_to_cpu(path->p_ext->ee_len)); #ifdef CHECK_BINSEARCH @@ -436,7 +435,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) chex = ex = EXT_FIRST_EXTENT(eh); for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { BUG_ON(k && le32_to_cpu(ex->ee_block) - <= le32_to_cpu(ex[-1].ee_block)); + <= le32_to_cpu(ex[-1].ee_block)); if (block < le32_to_cpu(ex->ee_block)) break; chex = ex; @@ -578,7 +577,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) - > le16_to_cpu(curp->p_hdr->eh_max)); + > le16_to_cpu(curp->p_hdr->eh_max)); BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); err = ext4_ext_dirty(handle, inode, curp); @@ -622,12 +621,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, border = path[depth].p_ext[1].ee_block; ext_debug("leaf will be split." " next leaf starts at %d\n", - le32_to_cpu(border)); + le32_to_cpu(border)); } else { border = newext->ee_block; ext_debug("leaf will be added." " next leaf starts at %d\n", - le32_to_cpu(border)); + le32_to_cpu(border)); } /* @@ -685,9 +684,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, while (path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)) { ext_debug("move %d:%llu:%d in new leaf %llu\n", - le32_to_cpu(path[depth].p_ext->ee_block), - ext_pblock(path[depth].p_ext), - le16_to_cpu(path[depth].p_ext->ee_len), + le32_to_cpu(path[depth].p_ext->ee_block), + ext_pblock(path[depth].p_ext), + le16_to_cpu(path[depth].p_ext->ee_len), newblock); /*memmove(ex++, path[depth].p_ext++, sizeof(struct ext4_extent)); @@ -766,9 +765,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, EXT_LAST_INDEX(path[i].p_hdr)); while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ext_debug("%d: move %d:%d in new index %llu\n", i, - le32_to_cpu(path[i].p_idx->ei_block), - idx_pblock(path[i].p_idx), - newblock); + le32_to_cpu(path[i].p_idx->ei_block), + idx_pblock(path[i].p_idx), + newblock); /*memmove(++fidx, path[i].p_idx++, sizeof(struct ext4_extent_idx)); neh->eh_entries++; @@ -1129,6 +1128,55 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, } /* + * check if a portion of the "newext" extent overlaps with an + * existing extent. + * + * If there is an overlap discovered, it updates the length of the newext + * such that there will be no overlap, and then returns 1. + * If there is no overlap found, it returns 0. + */ +unsigned int ext4_ext_check_overlap(struct inode *inode, + struct ext4_extent *newext, + struct ext4_ext_path *path) +{ + unsigned long b1, b2; + unsigned int depth, len1; + unsigned int ret = 0; + + b1 = le32_to_cpu(newext->ee_block); + len1 = le16_to_cpu(newext->ee_len); + depth = ext_depth(inode); + if (!path[depth].p_ext) + goto out; + b2 = le32_to_cpu(path[depth].p_ext->ee_block); + + /* + * get the next allocated block if the extent in the path + * is before the requested block(s) + */ + if (b2 < b1) { + b2 = ext4_ext_next_allocated_block(path); + if (b2 == EXT_MAX_BLOCK) + goto out; + } + + /* check for wrap through zero */ + if (b1 + len1 < b1) { + len1 = EXT_MAX_BLOCK - b1; + newext->ee_len = cpu_to_le16(len1); + ret = 1; + } + + /* check for overlap */ + if (b1 + len1 > b2) { + newext->ee_len = cpu_to_le16(b2 - b1); + ret = 1; + } +out: + return ret; +} + +/* * ext4_ext_insert_extent: * tries to merge requsted extent into the existing extent or * inserts requested extent as new one into the tree, @@ -1213,12 +1261,12 @@ has_space: if (!nearex) { /* there is no extent in this leaf, create first one */ ext_debug("first extent in the leaf: %d:%llu:%d\n", - le32_to_cpu(newext->ee_block), - ext_pblock(newext), - le16_to_cpu(newext->ee_len)); + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len)); path[depth].p_ext = EXT_FIRST_EXTENT(eh); } else if (le32_to_cpu(newext->ee_block) - > le32_to_cpu(nearex->ee_block)) { + > le32_to_cpu(nearex->ee_block)) { /* BUG_ON(newext->ee_block == nearex->ee_block); */ if (nearex != EXT_LAST_EXTENT(eh)) { len = EXT_MAX_EXTENT(eh) - nearex; @@ -1226,9 +1274,9 @@ has_space: len = len < 0 ? 0 : len; ext_debug("insert %d:%llu:%d after: nearest 0x%p, " "move %d from 0x%p to 0x%p\n", - le32_to_cpu(newext->ee_block), - ext_pblock(newext), - le16_to_cpu(newext->ee_len), + le32_to_cpu(newext->ee_block), + ext_pblock(newext), + le16_to_cpu(newext->ee_len), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 2, nearex + 1, len); } @@ -1359,9 +1407,9 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, cbex.ec_start = 0; cbex.ec_type = EXT4_EXT_CACHE_GAP; } else { - cbex.ec_block = le32_to_cpu(ex->ee_block); - cbex.ec_len = le16_to_cpu(ex->ee_len); - cbex.ec_start = ext_pblock(ex); + cbex.ec_block = le32_to_cpu(ex->ee_block); + cbex.ec_len = le16_to_cpu(ex->ee_len); + cbex.ec_start = ext_pblock(ex); cbex.ec_type = EXT4_EXT_CACHE_EXTENT; } @@ -1432,16 +1480,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, len = le32_to_cpu(ex->ee_block) - block; ext_debug("cache gap(before): %lu [%lu:%lu]", (unsigned long) block, - (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len)); + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len)); } else if (block >= le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len)) { - lblock = le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len); + + le16_to_cpu(ex->ee_len)) { + lblock = le32_to_cpu(ex->ee_block) + + le16_to_cpu(ex->ee_len); len = ext4_ext_next_allocated_block(path); ext_debug("cache gap(after): [%lu:%lu] %lu", - (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len), + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) le16_to_cpu(ex->ee_len), (unsigned long) block); BUG_ON(len == lblock); len = len - lblock; @@ -1469,9 +1517,9 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block, BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && cex->ec_type != EXT4_EXT_CACHE_EXTENT); if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { - ex->ee_block = cpu_to_le32(cex->ec_block); + ex->ee_block = cpu_to_le32(cex->ec_block); ext4_ext_store_pblock(ex, cex->ec_start); - ex->ee_len = cpu_to_le16(cex->ec_len); + ex->ee_len = cpu_to_le16(cex->ec_len); ext_debug("%lu cached by %lu:%lu:%llu\n", (unsigned long) block, (unsigned long) cex->ec_block, @@ -1957,9 +2005,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* we should allocate requested block */ } else if (goal == EXT4_EXT_CACHE_EXTENT) { /* block is already allocated */ - newblock = iblock - - le32_to_cpu(newex.ee_block) - + ext_pblock(&newex); + newblock = iblock + - le32_to_cpu(newex.ee_block) + + ext_pblock(&newex); /* number of remaining blocks in the extent */ allocated = le16_to_cpu(newex.ee_len) - (iblock - le32_to_cpu(newex.ee_block)); @@ -1988,7 +2036,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ex = path[depth].p_ext; if (ex) { - unsigned long ee_block = le32_to_cpu(ex->ee_block); + unsigned long ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext_pblock(ex); unsigned short ee_len = le16_to_cpu(ex->ee_len); @@ -2001,7 +2049,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (ee_len > EXT_MAX_LEN) goto out2; /* if found extent covers block, simply return it */ - if (iblock >= ee_block && iblock < ee_block + ee_len) { + if (iblock >= ee_block && iblock < ee_block + ee_len) { newblock = iblock - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (iblock - ee_block); @@ -2032,7 +2080,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* allocate new block */ goal = ext4_ext_find_goal(inode, path, iblock); - allocated = max_blocks; + + /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ + newex.ee_block = cpu_to_le32(iblock); + newex.ee_len = cpu_to_le16(max_blocks); + err = ext4_ext_check_overlap(inode, &newex, path); + if (err) + allocated = le16_to_cpu(newex.ee_len); + else + allocated = max_blocks; newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); if (!newblock) goto out2; @@ -2040,12 +2096,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goal, newblock, allocated); /* try to insert new extent into found leaf and return */ - newex.ee_block = cpu_to_le32(iblock); ext4_ext_store_pblock(&newex, newblock); newex.ee_len = cpu_to_le16(allocated); err = ext4_ext_insert_extent(handle, inode, path, &newex); - if (err) + if (err) { + /* free data blocks we just allocated */ + ext4_free_blocks(handle, inode, ext_pblock(&newex), + le16_to_cpu(newex.ee_len)); goto out2; + } if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = inode->i_size; @@ -2158,11 +2217,3 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) return needed; } - -EXPORT_SYMBOL(ext4_mark_inode_dirty); -EXPORT_SYMBOL(ext4_ext_invalidate_cache); -EXPORT_SYMBOL(ext4_ext_insert_extent); -EXPORT_SYMBOL(ext4_ext_walk_space); -EXPORT_SYMBOL(ext4_ext_find_goal); -EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert); - diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 810b6d6474b..8416fa28c42 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -27,7 +27,6 @@ #include <linux/time.h> #include <linux/ext4_jbd2.h> #include <linux/jbd2.h> -#include <linux/smp_lock.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/quotaops.h> @@ -256,8 +255,8 @@ static int verify_chain(Indirect *from, Indirect *to) * @inode: inode in question (we are only interested in its superblock) * @i_block: block number to be parsed * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. + * @boundary: set this non-zero if the referred-to block is likely to be + * followed (on disk) by an indirect block. * * To store the locations of file's data ext4 uses a data structure common * for UNIX filesystems - tree of pointers anchored in the inode, with @@ -2611,9 +2610,9 @@ void ext4_read_inode(struct inode * inode) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); + inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; ei->i_state = 0; @@ -2674,8 +2673,10 @@ void ext4_read_inode(struct inode * inode) */ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT4_INODE_SIZE(inode->i_sb)) + EXT4_INODE_SIZE(inode->i_sb)) { + brelse (bh); goto bad_inode; + } if (ei->i_extra_isize == 0) { /* The extra space is currently unused. Use it. */ ei->i_extra_isize = sizeof(struct ext4_inode) - diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e7e1d79a7d7..2811e5720ad 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -36,7 +36,6 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bio.h> -#include <linux/smp_lock.h> #include "namei.h" #include "xattr.h" @@ -47,7 +46,7 @@ */ #define NAMEI_RA_CHUNKS 2 #define NAMEI_RA_BLOCKS 4 -#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) +#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) static struct buffer_head *ext4_append(handle_t *handle, @@ -242,7 +241,7 @@ static inline unsigned dx_node_limit (struct inode *dir) static void dx_show_index (char * label, struct dx_entry *entries) { int i, n = dx_get_count (entries); - printk("%s index ", label); + printk("%s index ", label); for (i = 0; i < n; i++) { printk("%x->%u ", i? dx_get_hash(entries + i) : 0, dx_get_block(entries + i)); @@ -967,6 +966,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, (block<<EXT4_BLOCK_SIZE_BITS(sb)) +((char *)de - bh->b_data))) { brelse (bh); + *err = ERR_BAD_DX_DIR; goto errout; } *res_dir = de; @@ -1132,9 +1132,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split; struct ext4_dir_entry_2 *de = NULL, *de2; - int err; + int err = 0; - bh2 = ext4_append (handle, dir, &newblock, error); + bh2 = ext4_append (handle, dir, &newblock, &err); if (!(bh2)) { brelse(*bh); *bh = NULL; @@ -1143,14 +1143,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, BUFFER_TRACE(*bh, "get_write_access"); err = ext4_journal_get_write_access(handle, *bh); - if (err) { - journal_error: - brelse(*bh); - brelse(bh2); - *bh = NULL; - ext4_std_error(dir->i_sb, err); - goto errout; - } + if (err) + goto journal_error; + BUFFER_TRACE(frame->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, frame->bh); if (err) @@ -1193,8 +1188,16 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, goto journal_error; brelse (bh2); dxtrace(dx_show_index ("frame", frame->entries)); -errout: return de; + +journal_error: + brelse(*bh); + brelse(bh2); + *bh = NULL; + ext4_std_error(dir->i_sb, err); +errout: + *error = err; + return NULL; } #endif diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ea99f6c97f5..aa11d7dbe97 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -11,7 +11,6 @@ #define EXT4FS_DEBUG -#include <linux/smp_lock.h> #include <linux/ext4_jbd2.h> #include <linux/errno.h> diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 71912693235..175b68c6096 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -517,14 +517,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_orphan); + INIT_LIST_HEAD(&ei->i_orphan); #ifdef CONFIG_EXT4DEV_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - mutex_init(&ei->truncate_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->truncate_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) @@ -1987,7 +1985,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR - "EXT4: failed to claim external journal device.\n"); + "EXT4: failed to claim external journal device.\n"); blkdev_put(bdev); return NULL; } diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index b6a6861951f..f17eaf2321b 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -6,7 +6,6 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext4_jbd2.h> #include <linux/ext4_fs.h> #include <linux/security.h> diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index b76f2dbc82d..e0f05acdafe 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -9,7 +9,6 @@ #include <linux/string.h> #include <linux/capability.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext4_jbd2.h> #include <linux/ext4_fs.h> #include "xattr.h" diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index c53cded0761..7ed3d8ebf09 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -8,7 +8,6 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/ext4_jbd2.h> #include <linux/ext4_fs.h> #include "xattr.h" diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 1959143c1d2..3c9c8a15ec7 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -40,8 +40,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct fat_cache *cache = (struct fat_cache *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - INIT_LIST_HEAD(&cache->cache_list); + INIT_LIST_HEAD(&cache->cache_list); } int __init fat_cache_init(void) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index c16af246d24..ccf161dffb6 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -422,7 +422,7 @@ EODir: EXPORT_SYMBOL_GPL(fat_search_long); struct fat_ioctl_filldir_callback { - struct dirent __user *dirent; + void __user *dirent; int result; /* for dir ioctl */ const char *longname; @@ -647,62 +647,85 @@ static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir) return __fat_readdir(inode, filp, dirent, filldir, 0, 0); } -static int fat_ioctl_filldir(void *__buf, const char *name, int name_len, - loff_t offset, u64 ino, unsigned int d_type) +#define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ +static int func(void *__buf, const char *name, int name_len, \ + loff_t offset, u64 ino, unsigned int d_type) \ +{ \ + struct fat_ioctl_filldir_callback *buf = __buf; \ + struct dirent_type __user *d1 = buf->dirent; \ + struct dirent_type __user *d2 = d1 + 1; \ + \ + if (buf->result) \ + return -EINVAL; \ + buf->result++; \ + \ + if (name != NULL) { \ + /* dirent has only short name */ \ + if (name_len >= sizeof(d1->d_name)) \ + name_len = sizeof(d1->d_name) - 1; \ + \ + if (put_user(0, d2->d_name) || \ + put_user(0, &d2->d_reclen) || \ + copy_to_user(d1->d_name, name, name_len) || \ + put_user(0, d1->d_name + name_len) || \ + put_user(name_len, &d1->d_reclen)) \ + goto efault; \ + } else { \ + /* dirent has short and long name */ \ + const char *longname = buf->longname; \ + int long_len = buf->long_len; \ + const char *shortname = buf->shortname; \ + int short_len = buf->short_len; \ + \ + if (long_len >= sizeof(d1->d_name)) \ + long_len = sizeof(d1->d_name) - 1; \ + if (short_len >= sizeof(d1->d_name)) \ + short_len = sizeof(d1->d_name) - 1; \ + \ + if (copy_to_user(d2->d_name, longname, long_len) || \ + put_user(0, d2->d_name + long_len) || \ + put_user(long_len, &d2->d_reclen) || \ + put_user(ino, &d2->d_ino) || \ + put_user(offset, &d2->d_off) || \ + copy_to_user(d1->d_name, shortname, short_len) || \ + put_user(0, d1->d_name + short_len) || \ + put_user(short_len, &d1->d_reclen)) \ + goto efault; \ + } \ + return 0; \ +efault: \ + buf->result = -EFAULT; \ + return -EFAULT; \ +} + +FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent) + +static int fat_ioctl_readdir(struct inode *inode, struct file *filp, + void __user *dirent, filldir_t filldir, + int short_only, int both) { - struct fat_ioctl_filldir_callback *buf = __buf; - struct dirent __user *d1 = buf->dirent; - struct dirent __user *d2 = d1 + 1; - - if (buf->result) - return -EINVAL; - buf->result++; - - if (name != NULL) { - /* dirent has only short name */ - if (name_len >= sizeof(d1->d_name)) - name_len = sizeof(d1->d_name) - 1; - - if (put_user(0, d2->d_name) || - put_user(0, &d2->d_reclen) || - copy_to_user(d1->d_name, name, name_len) || - put_user(0, d1->d_name + name_len) || - put_user(name_len, &d1->d_reclen)) - goto efault; - } else { - /* dirent has short and long name */ - const char *longname = buf->longname; - int long_len = buf->long_len; - const char *shortname = buf->shortname; - int short_len = buf->short_len; - - if (long_len >= sizeof(d1->d_name)) - long_len = sizeof(d1->d_name) - 1; - if (short_len >= sizeof(d1->d_name)) - short_len = sizeof(d1->d_name) - 1; - - if (copy_to_user(d2->d_name, longname, long_len) || - put_user(0, d2->d_name + long_len) || - put_user(long_len, &d2->d_reclen) || - put_user(ino, &d2->d_ino) || - put_user(offset, &d2->d_off) || - copy_to_user(d1->d_name, shortname, short_len) || - put_user(0, d1->d_name + short_len) || - put_user(short_len, &d1->d_reclen)) - goto efault; + struct fat_ioctl_filldir_callback buf; + int ret; + + buf.dirent = dirent; + buf.result = 0; + mutex_lock(&inode->i_mutex); + ret = -ENOENT; + if (!IS_DEADDIR(inode)) { + ret = __fat_readdir(inode, filp, &buf, filldir, + short_only, both); } - return 0; -efault: - buf->result = -EFAULT; - return -EFAULT; + mutex_unlock(&inode->i_mutex); + if (ret >= 0) + ret = buf.result; + return ret; } -static int fat_dir_ioctl(struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long arg) +static int fat_dir_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) { - struct fat_ioctl_filldir_callback buf; - struct dirent __user *d1; - int ret, short_only, both; + struct dirent __user *d1 = (struct dirent __user *)arg; + int short_only, both; switch (cmd) { case VFAT_IOCTL_READDIR_SHORT: @@ -717,7 +740,6 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp, return fat_generic_ioctl(inode, filp, cmd, arg); } - d1 = (struct dirent __user *)arg; if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) return -EFAULT; /* @@ -728,69 +750,48 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp, if (put_user(0, &d1->d_reclen)) return -EFAULT; - buf.dirent = d1; - buf.result = 0; - mutex_lock(&inode->i_mutex); - ret = -ENOENT; - if (!IS_DEADDIR(inode)) { - ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir, - short_only, both); - } - mutex_unlock(&inode->i_mutex); - if (ret >= 0) - ret = buf.result; - return ret; + return fat_ioctl_readdir(inode, filp, d1, fat_ioctl_filldir, + short_only, both); } #ifdef CONFIG_COMPAT #define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2]) #define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2]) -static long fat_compat_put_dirent32(struct dirent *d, - struct compat_dirent __user *d32) -{ - if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent))) - return -EFAULT; - - __put_user(d->d_ino, &d32->d_ino); - __put_user(d->d_off, &d32->d_off); - __put_user(d->d_reclen, &d32->d_reclen); - if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen)) - return -EFAULT; +FAT_IOCTL_FILLDIR_FUNC(fat_compat_ioctl_filldir, compat_dirent) - return 0; -} - -static long fat_compat_dir_ioctl(struct file *file, unsigned cmd, +static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { - struct compat_dirent __user *p = compat_ptr(arg); - int ret; - mm_segment_t oldfs = get_fs(); - struct dirent d[2]; + struct inode *inode = filp->f_path.dentry->d_inode; + struct compat_dirent __user *d1 = compat_ptr(arg); + int short_only, both; switch (cmd) { - case VFAT_IOCTL_READDIR_BOTH32: - cmd = VFAT_IOCTL_READDIR_BOTH; - break; case VFAT_IOCTL_READDIR_SHORT32: - cmd = VFAT_IOCTL_READDIR_SHORT; + short_only = 1; + both = 0; + break; + case VFAT_IOCTL_READDIR_BOTH32: + short_only = 0; + both = 1; break; default: return -ENOIOCTLCMD; } - set_fs(KERNEL_DS); - lock_kernel(); - ret = fat_dir_ioctl(file->f_path.dentry->d_inode, file, - cmd, (unsigned long) &d); - unlock_kernel(); - set_fs(oldfs); - if (ret >= 0) { - ret |= fat_compat_put_dirent32(&d[0], p); - ret |= fat_compat_put_dirent32(&d[1], p + 1); - } - return ret; + if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) + return -EFAULT; + /* + * Yes, we don't need this put_user() absolutely. However old + * code didn't return the right value. So, app use this value, + * in order to check whether it is EOF. + */ + if (put_user(0, &d1->d_reclen)) + return -EFAULT; + + return fat_ioctl_readdir(inode, filp, d1, fat_compat_ioctl_filldir, + short_only, both); } #endif /* CONFIG_COMPAT */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 65cb54bde48..479722d8966 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -25,6 +25,7 @@ #include <linux/parser.h> #include <linux/uio.h> #include <linux/writeback.h> +#include <linux/log2.h> #include <asm/unaligned.h> #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -499,14 +500,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - spin_lock_init(&ei->cache_lru_lock); - ei->nr_caches = 0; - ei->cache_valid_id = FAT_CACHE_VALID + 1; - INIT_LIST_HEAD(&ei->cache_lru); - INIT_HLIST_NODE(&ei->i_fat_hash); - inode_init_once(&ei->vfs_inode); - } + spin_lock_init(&ei->cache_lru_lock); + ei->nr_caches = 0; + ei->cache_valid_id = FAT_CACHE_VALID + 1; + INIT_LIST_HEAD(&ei->cache_lru); + INIT_HLIST_NODE(&ei->i_fat_hash); + inode_init_once(&ei->vfs_inode); } static int __init fat_init_inodecache(void) @@ -824,6 +823,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) } if (opts->name_check != 'n') seq_printf(m, ",check=%c", opts->name_check); + if (opts->usefree) + seq_puts(m, ",usefree"); if (opts->quiet) seq_puts(m, ",quiet"); if (opts->showexec) @@ -849,7 +850,7 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) enum { Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid, - Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_nocase, + Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable, Opt_dots, Opt_nodots, Opt_charset, Opt_shortname_lower, Opt_shortname_win95, @@ -871,6 +872,7 @@ static match_table_t fat_tokens = { {Opt_dmask, "dmask=%o"}, {Opt_fmask, "fmask=%o"}, {Opt_codepage, "codepage=%u"}, + {Opt_usefree, "usefree"}, {Opt_nocase, "nocase"}, {Opt_quiet, "quiet"}, {Opt_showexec, "showexec"}, @@ -950,7 +952,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; opts->utf8 = opts->unicode_xlate = 0; opts->numtail = 1; - opts->nocase = 0; + opts->usefree = opts->nocase = 0; *debug = 0; if (!options) @@ -978,6 +980,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_check_n: opts->name_check = 'n'; break; + case Opt_usefree: + opts->usefree = 1; + break; case Opt_nocase: if (!is_vfat) opts->nocase = 1; @@ -1217,8 +1222,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, } logical_sector_size = le16_to_cpu(get_unaligned((__le16 *)&b->sector_size)); - if (!logical_sector_size - || (logical_sector_size & (logical_sector_size - 1)) + if (!is_power_of_2(logical_sector_size) || (logical_sector_size < 512) || (PAGE_CACHE_SIZE < logical_sector_size)) { if (!silent) @@ -1228,8 +1232,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, goto out_invalid; } sbi->sec_per_clus = b->sec_per_clus; - if (!sbi->sec_per_clus - || (sbi->sec_per_clus & (sbi->sec_per_clus - 1))) { + if (!is_power_of_2(sbi->sec_per_clus)) { if (!silent) printk(KERN_ERR "FAT: bogus sectors per cluster %u\n", sbi->sec_per_clus); @@ -1305,7 +1308,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, le32_to_cpu(fsinfo->signature2), sbi->fsinfo_sector); } else { - sbi->free_clusters = le32_to_cpu(fsinfo->free_clusters); + if (sbi->options.usefree) + sbi->free_clusters = + le32_to_cpu(fsinfo->free_clusters); sbi->prev_free = le32_to_cpu(fsinfo->next_cluster); } diff --git a/fs/fifo.c b/fs/fifo.c index 49035b174b4..9785e36f81e 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -11,8 +11,8 @@ #include <linux/mm.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/fs.h> +#include <linux/sched.h> #include <linux/pipe_fs_i.h> static void wait_for_partner(struct inode* inode, unsigned int *cnt) diff --git a/fs/file_table.c b/fs/file_table.c index 4c17a18d8c1..d17fd691b83 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -10,7 +10,6 @@ #include <linux/file.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/security.h> #include <linux/eventpoll.h> diff --git a/fs/filesystems.c b/fs/filesystems.c index 7a4f61aa05f..f37f8726283 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -41,11 +41,12 @@ void put_filesystem(struct file_system_type *fs) module_put(fs->owner); } -static struct file_system_type **find_filesystem(const char *name) +static struct file_system_type **find_filesystem(const char *name, unsigned len) { struct file_system_type **p; for (p=&file_systems; *p; p=&(*p)->next) - if (strcmp((*p)->name,name) == 0) + if (strlen((*p)->name) == len && + strncmp((*p)->name, name, len) == 0) break; return p; } @@ -68,11 +69,12 @@ int register_filesystem(struct file_system_type * fs) int res = 0; struct file_system_type ** p; + BUG_ON(strchr(fs->name, '.')); if (fs->next) return -EBUSY; INIT_LIST_HEAD(&fs->fs_supers); write_lock(&file_systems_lock); - p = find_filesystem(fs->name); + p = find_filesystem(fs->name, strlen(fs->name)); if (*p) res = -EBUSY; else @@ -215,19 +217,26 @@ int get_filesystem_list(char * buf) struct file_system_type *get_fs_type(const char *name) { struct file_system_type *fs; + const char *dot = strchr(name, '.'); + unsigned len = dot ? dot - name : strlen(name); read_lock(&file_systems_lock); - fs = *(find_filesystem(name)); + fs = *(find_filesystem(name, len)); if (fs && !try_module_get(fs->owner)) fs = NULL; read_unlock(&file_systems_lock); - if (!fs && (request_module("%s", name) == 0)) { + if (!fs && (request_module("%.*s", len, name) == 0)) { read_lock(&file_systems_lock); - fs = *(find_filesystem(name)); + fs = *(find_filesystem(name, len)); if (fs && !try_module_get(fs->owner)) fs = NULL; read_unlock(&file_systems_lock); } + + if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { + put_filesystem(fs); + fs = NULL; + } return fs; } diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c index 2d71128bd8d..f86fd3cacd5 100644 --- a/fs/freevxfs/vxfs_bmap.c +++ b/fs/freevxfs/vxfs_bmap.c @@ -137,7 +137,7 @@ vxfs_bmap_indir(struct inode *ip, long indir, int size, long block) bp = sb_bread(ip->i_sb, indir + (i / VXFS_TYPED_PER_BLOCK(ip->i_sb))); - if (!buffer_mapped(bp)) + if (!bp || !buffer_mapped(bp)) return 0; typ = ((struct vxfs_typed *)bp->b_data) + diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 098a915fd9a..d1f7c5b5b3c 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -99,7 +99,7 @@ vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino) offset = ((ino % (sbp->s_blocksize / VXFS_ISIZE)) * VXFS_ISIZE); bp = sb_bread(sbp, block); - if (buffer_mapped(bp)) { + if (bp && buffer_mapped(bp)) { struct vxfs_inode_info *vip; struct vxfs_dinode *dip; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8890eba1db5..bd5a772d8cc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -485,7 +485,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, static int fuse_create(struct inode *dir, struct dentry *entry, int mode, struct nameidata *nd) { - if (nd && (nd->flags & LOOKUP_CREATE)) { + if (nd && (nd->flags & LOOKUP_OPEN)) { int err = fuse_create_open(dir, entry, mode, nd); if (err != -ENOSYS) return err; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index acfad65a6e8..adf7995232b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -11,6 +11,7 @@ #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/kernel.h> +#include <linux/sched.h> static const struct file_operations fuse_direct_io_file_operations; @@ -609,7 +610,9 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, ssize_t res; /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = fuse_direct_io(file, buf, count, ppos, 1); + res = generic_write_checks(file, ppos, &count, 0); + if (!res) + res = fuse_direct_io(file, buf, count, ppos, 1); mutex_unlock(&inode->i_mutex); return res; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8003be56e0..cc5efc13496 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -17,6 +17,7 @@ #include <linux/parser.h> #include <linux/statfs.h> #include <linux/random.h> +#include <linux/sched.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -453,6 +454,7 @@ static const struct super_operations fuse_super_operations = { .destroy_inode = fuse_destroy_inode, .read_inode = fuse_read_inode, .clear_inode = fuse_clear_inode, + .drop_inode = generic_delete_inode, .remount_fs = fuse_remount_fs, .put_super = fuse_put_super, .umount_begin = fuse_umount_begin, @@ -636,6 +638,7 @@ static int fuse_get_sb(struct file_system_type *fs_type, static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", + .fs_flags = FS_HAS_SUBTYPE, .get_sb = fuse_get_sb, .kill_sb = kill_anon_super, }; @@ -654,7 +657,7 @@ static struct file_system_type fuseblk_fs_type = { .name = "fuseblk", .get_sb = fuse_get_sb_blk, .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; static inline int register_fuseblk(void) @@ -685,8 +688,7 @@ static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep, { struct inode * inode = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(inode); + inode_init_once(inode); } static int __init fuse_fs_init(void) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 11477ca3a3c..b3e152db70c 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -10,6 +10,7 @@ #ifndef __GLOCK_DOT_H__ #define __GLOCK_DOT_H__ +#include <linux/sched.h> #include "incore.h" /* Flags for lock requests; used in gfs2_holder gh_flag field. diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 39c8ae23bd9..7b82657a991 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -163,10 +163,7 @@ static void inode_go_sync(struct gfs2_glock *gl) if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; int error = filemap_fdatawait(mapping); - if (error == -ENOSPC) - set_bit(AS_ENOSPC, &mapping->flags); - else if (error) - set_bit(AS_EIO, &mapping->flags); + mapping_set_error(mapping, error); } clear_bit(GLF_DIRTY, &gl->gl_flags); gfs2_ail_empty_gl(gl); diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c index 5cc1dfa7944..0d149c8c493 100644 --- a/fs/gfs2/locking/nolock/main.c +++ b/fs/gfs2/locking/nolock/main.c @@ -13,7 +13,6 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/lm_interface.h> struct nolock_lockspace { diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index e460487c055..787a0edef10 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -27,29 +27,27 @@ static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_inode *ip = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&ip->i_inode); - spin_lock_init(&ip->i_spin); - init_rwsem(&ip->i_rw_mutex); - memset(ip->i_cache, 0, sizeof(ip->i_cache)); - } + + inode_init_once(&ip->i_inode); + spin_lock_init(&ip->i_spin); + init_rwsem(&ip->i_rw_mutex); + memset(ip->i_cache, 0, sizeof(ip->i_cache)); } static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_glock *gl = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_HLIST_NODE(&gl->gl_list); - spin_lock_init(&gl->gl_spin); - INIT_LIST_HEAD(&gl->gl_holders); - INIT_LIST_HEAD(&gl->gl_waiters1); - INIT_LIST_HEAD(&gl->gl_waiters3); - gl->gl_lvb = NULL; - atomic_set(&gl->gl_lvb_count, 0); - INIT_LIST_HEAD(&gl->gl_reclaim); - INIT_LIST_HEAD(&gl->gl_ail_list); - atomic_set(&gl->gl_ail_count, 0); - } + + INIT_HLIST_NODE(&gl->gl_list); + spin_lock_init(&gl->gl_spin); + INIT_LIST_HEAD(&gl->gl_holders); + INIT_LIST_HEAD(&gl->gl_waiters1); + INIT_LIST_HEAD(&gl->gl_waiters3); + gl->gl_lvb = NULL; + atomic_set(&gl->gl_lvb_count, 0); + INIT_LIST_HEAD(&gl->gl_reclaim); + INIT_LIST_HEAD(&gl->gl_ail_list); + atomic_set(&gl->gl_ail_count, 0); } /** diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index c6bac6b6942..a6fdc52f554 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -11,7 +11,6 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/lm_interface.h> diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 329c4dcdecd..064df880458 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -15,7 +15,6 @@ #include <linux/uio.h> #include <linux/blkdev.h> #include <linux/mm.h> -#include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/gfs2_ondisk.h> #include <linux/ext2_fs.h> diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 5fd0ed71f92..8a3a650abc8 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -9,6 +9,7 @@ */ #include <linux/pagemap.h> +#include <linux/log2.h> #include "btree.h" @@ -76,7 +77,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke tree->depth = be16_to_cpu(head->depth); size = tree->node_size; - if (!size || size & (size - 1)) + if (!is_power_of_2(size)) goto fail_page; if (!tree->node_count) goto fail_page; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index fafcba59387..9a934db0bd8 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -13,6 +13,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> +#include <linux/sched.h> #include "hfs_fs.h" #include "btree.h" diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4f1888f16cf..92cf8751e42 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,8 +434,7 @@ static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flag { struct hfs_inode_info *i = p; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&i->vfs_inode); + inode_init_once(&i->vfs_inode); } static int __init init_hfs_fs(void) diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index a9b9e872e29..90ebab753d3 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/log2.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" @@ -69,7 +70,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) } size = tree->node_size; - if (!size || size & (size - 1)) + if (!is_power_of_2(size)) goto fail_page; if (!tree->node_count) goto fail_page; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 642012ac337..45dab5d6cc1 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -12,6 +12,7 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/mpage.h> +#include <linux/sched.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 37afbec8a76..ebd1b380cbb 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -470,8 +470,7 @@ static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long { struct hfsplus_inode_info *i = p; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&i->vfs_inode); + inode_init_once(&i->vfs_inode); } static int __init init_hfsplus_fs(void) diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 70543b17e4c..06e5930515f 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -55,7 +55,7 @@ extern int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, int *nlink_out, int *uid_out, int *gid_out, unsigned long long *size_out, struct timespec *atime_out, struct timespec *mtime_out, struct timespec *ctime_out, - int *blksize_out, unsigned long long *blocks_out); + int *blksize_out, unsigned long long *blocks_out, int fd); extern int access_file(char *path, int r, int w, int x); extern int open_file(char *path, int r, int w, int append); extern int file_type(const char *path, int *maj, int *min); @@ -71,7 +71,7 @@ extern int lseek_file(int fd, long long offset, int whence); extern int fsync_file(int fd, int datasync); extern int file_create(char *name, int ur, int uw, int ux, int gr, int gw, int gx, int or, int ow, int ox); -extern int set_attr(const char *file, struct hostfs_iattr *attrs); +extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd); extern int make_symlink(const char *from, const char *to); extern int unlink_file(const char *file); extern int do_mkdir(const char *file, int mode); @@ -87,14 +87,3 @@ extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, long *spare_out); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fd301a91012..8286491dbf3 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL * * Ported the filesystem routines to 2.5. @@ -31,14 +31,14 @@ struct hostfs_inode_info { static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) { - return(list_entry(inode, struct hostfs_inode_info, vfs_inode)); + return list_entry(inode, struct hostfs_inode_info, vfs_inode); } #define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) int hostfs_d_delete(struct dentry *dentry) { - return(1); + return 1; } struct dentry_operations hostfs_dentry_ops = { @@ -79,7 +79,7 @@ static int __init hostfs_args(char *options, int *add) } options = ptr; } - return(0); + return 0; } __uml_setup("hostfs=", hostfs_args, @@ -110,7 +110,8 @@ static char *dentry_name(struct dentry *dentry, int extra) root = HOSTFS_I(parent->d_inode)->host_filename; len += strlen(root); name = kmalloc(len + extra + 1, GFP_KERNEL); - if(name == NULL) return(NULL); + if(name == NULL) + return NULL; name[len] = '\0'; parent = dentry; @@ -122,7 +123,7 @@ static char *dentry_name(struct dentry *dentry, int extra) parent = parent->d_parent; } strncpy(name, root, strlen(root)); - return(name); + return name; } static char *inode_name(struct inode *ino, int extra) @@ -130,7 +131,7 @@ static char *inode_name(struct inode *ino, int extra) struct dentry *dentry; dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); - return(dentry_name(dentry, extra)); + return dentry_name(dentry, extra); } static int read_name(struct inode *ino, char *name) @@ -147,16 +148,16 @@ static int read_name(struct inode *ino, char *name) err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, - &ino->i_ctime, &i_blksize, &i_blocks); + &ino->i_ctime, &i_blksize, &i_blocks, -1); if(err) - return(err); + return err; ino->i_ino = i_ino; ino->i_mode = i_mode; ino->i_nlink = i_nlink; ino->i_size = i_size; ino->i_blocks = i_blocks; - return(0); + return 0; } static char *follow_link(char *link) @@ -181,11 +182,11 @@ static char *follow_link(char *link) goto out_free; if(*name == '/') - return(name); + return name; end = strrchr(link, '/'); if(end == NULL) - return(name); + return name; *(end + 1) = '\0'; len = strlen(link) + strlen(name) + 1; @@ -199,12 +200,12 @@ static char *follow_link(char *link) sprintf(resolved, "%s%s", link, name); kfree(name); kfree(link); - return(resolved); + return resolved; out_free: kfree(name); out: - return(ERR_PTR(n)); + return ERR_PTR(n); } static int read_inode(struct inode *ino) @@ -234,7 +235,7 @@ static int read_inode(struct inode *ino) err = read_name(ino, name); kfree(name); out: - return(err); + return err; } int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) @@ -254,14 +255,15 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), &sf->f_namelen, sf->f_spare); - if(err) return(err); + if(err) + return err; sf->f_blocks = f_blocks; sf->f_bfree = f_bfree; sf->f_bavail = f_bavail; sf->f_files = f_files; sf->f_ffree = f_ffree; sf->f_type = HOSTFS_SUPER_MAGIC; - return(0); + return 0; } static struct inode *hostfs_alloc_inode(struct super_block *sb) @@ -270,13 +272,13 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) hi = kmalloc(sizeof(*hi), GFP_KERNEL); if(hi == NULL) - return(NULL); + return NULL; *hi = ((struct hostfs_inode_info) { .host_filename = NULL, .fd = -1, .mode = 0 }); inode_init_once(&hi->vfs_inode); - return(&hi->vfs_inode); + return &hi->vfs_inode; } static void hostfs_delete_inode(struct inode *inode) @@ -325,10 +327,12 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) int error, len; name = dentry_name(file->f_path.dentry, 0); - if(name == NULL) return(-ENOMEM); + if(name == NULL) + return -ENOMEM; dir = open_dir(name, &error); kfree(name); - if(dir == NULL) return(-error); + if(dir == NULL) + return -error; next = file->f_pos; while((name = read_dir(dir, &next, &ino, &len)) != NULL){ error = (*filldir)(ent, name, len, file->f_pos, @@ -337,7 +341,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) file->f_pos = next; } close_dir(dir); - return(0); + return 0; } int hostfs_file_open(struct inode *ino, struct file *file) @@ -347,7 +351,7 @@ int hostfs_file_open(struct inode *ino, struct file *file) mode = file->f_mode & (FMODE_READ | FMODE_WRITE); if((mode & HOSTFS_I(ino)->mode) == mode) - return(0); + return 0; /* The file may already have been opened, but with the wrong access, * so this resets things and reopens the file with the new access. @@ -367,14 +371,15 @@ int hostfs_file_open(struct inode *ino, struct file *file) name = dentry_name(file->f_path.dentry, 0); if(name == NULL) - return(-ENOMEM); + return -ENOMEM; fd = open_file(name, r, w, append); kfree(name); - if(fd < 0) return(fd); + if(fd < 0) + return fd; FILE_HOSTFS_I(file)->fd = fd; - return(0); + return 0; } int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) @@ -458,7 +463,7 @@ int hostfs_readpage(struct file *file, struct page *page) out: kunmap(page); unlock_page(page); - return(err); + return err; } int hostfs_prepare_write(struct file *file, struct page *page, @@ -485,7 +490,7 @@ int hostfs_prepare_write(struct file *file, struct page *page, err = 0; out: kunmap(page); - return(err); + return err; } int hostfs_commit_write(struct file *file, struct page *page, unsigned from, @@ -511,7 +516,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from, inode->i_size = start; kunmap(page); - return(err); + return err; } static const struct address_space_operations hostfs_aops = { @@ -569,7 +574,7 @@ static int init_inode(struct inode *inode, struct dentry *dentry) break; } out: - return(err); + return err; } int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, @@ -607,16 +612,16 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, HOSTFS_I(inode)->fd = fd; HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE; d_instantiate(dentry, inode); - return(0); + return 0; out_put: iput(inode); out: - return(error); + return error; } struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + struct nameidata *nd) { struct inode *inode; char *name; @@ -647,44 +652,45 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, d_add(dentry, inode); dentry->d_op = &hostfs_dentry_ops; - return(NULL); + return NULL; out_put: iput(inode); out: - return(ERR_PTR(err)); + return ERR_PTR(err); } static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) { - char *file; + char *file; int len; file = inode_name(ino, dentry->d_name.len + 1); - if(file == NULL) return(NULL); - strcat(file, "/"); + if(file == NULL) + return NULL; + strcat(file, "/"); len = strlen(file); - strncat(file, dentry->d_name.name, dentry->d_name.len); + strncat(file, dentry->d_name.name, dentry->d_name.len); file[len + dentry->d_name.len] = '\0'; - return(file); + return file; } int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) { - char *from_name, *to_name; - int err; + char *from_name, *to_name; + int err; - if((from_name = inode_dentry_name(ino, from)) == NULL) - return(-ENOMEM); - to_name = dentry_name(to, 0); + if((from_name = inode_dentry_name(ino, from)) == NULL) + return -ENOMEM; + to_name = dentry_name(to, 0); if(to_name == NULL){ kfree(from_name); - return(-ENOMEM); + return -ENOMEM; } - err = link_file(to_name, from_name); - kfree(from_name); - kfree(to_name); - return(err); + err = link_file(to_name, from_name); + kfree(from_name); + kfree(to_name); + return err; } int hostfs_unlink(struct inode *ino, struct dentry *dentry) @@ -692,13 +698,14 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + if((file = inode_dentry_name(ino, dentry)) == NULL) + return -ENOMEM; if(append) - return(-EPERM); + return -EPERM; err = unlink_file(file); kfree(file); - return(err); + return err; } int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) @@ -706,10 +713,11 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + if((file = inode_dentry_name(ino, dentry)) == NULL) + return -ENOMEM; err = make_symlink(file, to); kfree(file); - return(err); + return err; } int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) @@ -717,10 +725,11 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + if((file = inode_dentry_name(ino, dentry)) == NULL) + return -ENOMEM; err = do_mkdir(file, mode); kfree(file); - return(err); + return err; } int hostfs_rmdir(struct inode *ino, struct dentry *dentry) @@ -728,10 +737,11 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + if((file = inode_dentry_name(ino, dentry)) == NULL) + return -ENOMEM; err = do_rmdir(file); kfree(file); - return(err); + return err; } int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) @@ -764,14 +774,14 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) goto out_put; d_instantiate(dentry, inode); - return(0); + return 0; out_free: kfree(name); out_put: iput(inode); out: - return(err); + return err; } int hostfs_rename(struct inode *from_ino, struct dentry *from, @@ -781,15 +791,15 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, int err; if((from_name = inode_dentry_name(from_ino, from)) == NULL) - return(-ENOMEM); + return -ENOMEM; if((to_name = inode_dentry_name(to_ino, to)) == NULL){ kfree(from_name); - return(-ENOMEM); + return -ENOMEM; } err = rename_file(from_name, to_name); kfree(from_name); kfree(to_name); - return(err); + return err; } int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) @@ -801,7 +811,8 @@ int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) if (desired & MAY_WRITE) w = 1; if (desired & MAY_EXEC) x = 1; name = inode_name(ino, 0); - if (name == NULL) return(-ENOMEM); + if (name == NULL) + return -ENOMEM; if (S_ISCHR(ino->i_mode) || S_ISBLK(ino->i_mode) || S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode)) @@ -820,6 +831,8 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) char *name; int err; + int fd = HOSTFS_I(dentry->d_inode)->fd; + err = inode_change_ok(dentry->d_inode, attr); if (err) return err; @@ -863,20 +876,21 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; } name = dentry_name(dentry, 0); - if(name == NULL) return(-ENOMEM); - err = set_attr(name, &attrs); + if(name == NULL) + return -ENOMEM; + err = set_attr(name, &attrs, fd); kfree(name); if(err) - return(err); + return err; - return(inode_setattr(dentry->d_inode, attr)); + return inode_setattr(dentry->d_inode, attr); } int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { generic_fillattr(dentry->d_inode, stat); - return(0); + return 0; } static const struct inode_operations hostfs_iops = { @@ -915,7 +929,8 @@ int hostfs_link_readpage(struct file *file, struct page *page) buffer = kmap(page); name = inode_name(page->mapping->host, 0); - if(name == NULL) return(-ENOMEM); + if(name == NULL) + return -ENOMEM; err = do_readlink(name, buffer, PAGE_CACHE_SIZE); kfree(name); if(err == PAGE_CACHE_SIZE) @@ -928,7 +943,7 @@ int hostfs_link_readpage(struct file *file, struct page *page) } kunmap(page); unlock_page(page); - return(err); + return err; } static const struct address_space_operations hostfs_link_aops = { @@ -978,20 +993,20 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) err = read_inode(root_inode); if(err){ - /* No iput in this case because the dput does that for us */ - dput(sb->s_root); - sb->s_root = NULL; + /* No iput in this case because the dput does that for us */ + dput(sb->s_root); + sb->s_root = NULL; goto out; - } + } - return(0); + return 0; - out_put: - iput(root_inode); - out_free: +out_put: + iput(root_inode); +out_free: kfree(host_root_path); - out: - return(err); +out: + return err; } static int hostfs_read_sb(struct file_system_type *type, @@ -1011,7 +1026,7 @@ static struct file_system_type hostfs_type = { static int __init init_hostfs(void) { - return(register_filesystem(&hostfs_type)); + return register_filesystem(&hostfs_type); } static void __exit exit_hostfs(void) @@ -1022,14 +1037,3 @@ static void __exit exit_hostfs(void) module_init(init_hostfs) module_exit(exit_hostfs) MODULE_LICENSE("GPL"); - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 1ed5ea389f1..5625e2481dd 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -21,12 +21,16 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, int *nlink_out, int *uid_out, int *gid_out, unsigned long long *size_out, struct timespec *atime_out, struct timespec *mtime_out, struct timespec *ctime_out, - int *blksize_out, unsigned long long *blocks_out) + int *blksize_out, unsigned long long *blocks_out, int fd) { struct stat64 buf; - if(lstat64(path, &buf) < 0) - return(-errno); + if(fd >= 0) { + if (fstat64(fd, &buf) < 0) + return -errno; + } else if(lstat64(path, &buf) < 0) { + return -errno; + } if(inode_out != NULL) *inode_out = buf.st_ino; if(mode_out != NULL) *mode_out = buf.st_mode; @@ -48,7 +52,7 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, } if(blksize_out != NULL) *blksize_out = buf.st_blksize; if(blocks_out != NULL) *blocks_out = buf.st_blocks; - return(0); + return 0; } int file_type(const char *path, int *maj, int *min) @@ -56,7 +60,7 @@ int file_type(const char *path, int *maj, int *min) struct stat64 buf; if(lstat64(path, &buf) < 0) - return(-errno); + return -errno; /*We cannot pass rdev as is because glibc and the kernel disagree *about its definition.*/ if(maj != NULL) @@ -64,13 +68,13 @@ int file_type(const char *path, int *maj, int *min) if(min != NULL) *min = minor(buf.st_rdev); - if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); - else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); - else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); - else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); - else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO); - else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK); - else return(OS_TYPE_FILE); + if(S_ISDIR(buf.st_mode)) return OS_TYPE_DIR; + else if(S_ISLNK(buf.st_mode)) return OS_TYPE_SYMLINK; + else if(S_ISCHR(buf.st_mode)) return OS_TYPE_CHARDEV; + else if(S_ISBLK(buf.st_mode)) return OS_TYPE_BLOCKDEV; + else if(S_ISFIFO(buf.st_mode))return OS_TYPE_FIFO; + else if(S_ISSOCK(buf.st_mode))return OS_TYPE_SOCK; + else return OS_TYPE_FILE; } int access_file(char *path, int r, int w, int x) @@ -80,8 +84,9 @@ int access_file(char *path, int r, int w, int x) if(r) mode = R_OK; if(w) mode |= W_OK; if(x) mode |= X_OK; - if(access(path, mode) != 0) return(-errno); - else return(0); + if(access(path, mode) != 0) + return -errno; + else return 0; } int open_file(char *path, int r, int w, int append) @@ -99,8 +104,9 @@ int open_file(char *path, int r, int w, int append) if(append) mode |= O_APPEND; fd = open64(path, mode); - if(fd < 0) return(-errno); - else return(fd); + if(fd < 0) + return -errno; + else return fd; } void *open_dir(char *path, int *err_out) @@ -109,8 +115,9 @@ void *open_dir(char *path, int *err_out) dir = opendir(path); *err_out = errno; - if(dir == NULL) return(NULL); - return(dir); + if(dir == NULL) + return NULL; + return dir; } char *read_dir(void *stream, unsigned long long *pos, @@ -121,11 +128,12 @@ char *read_dir(void *stream, unsigned long long *pos, seekdir(dir, *pos); ent = readdir(dir); - if(ent == NULL) return(NULL); + if(ent == NULL) + return NULL; *len_out = strlen(ent->d_name); *ino_out = ent->d_ino; *pos = telldir(dir); - return(ent->d_name); + return ent->d_name; } int read_file(int fd, unsigned long long *offset, char *buf, int len) @@ -133,9 +141,10 @@ int read_file(int fd, unsigned long long *offset, char *buf, int len) int n; n = pread64(fd, buf, len, *offset); - if(n < 0) return(-errno); + if(n < 0) + return -errno; *offset += n; - return(n); + return n; } int write_file(int fd, unsigned long long *offset, const char *buf, int len) @@ -143,9 +152,10 @@ int write_file(int fd, unsigned long long *offset, const char *buf, int len) int n; n = pwrite64(fd, buf, len, *offset); - if(n < 0) return(-errno); + if(n < 0) + return -errno; *offset += n; - return(n); + return n; } int lseek_file(int fd, long long offset, int whence) @@ -154,8 +164,8 @@ int lseek_file(int fd, long long offset, int whence) ret = lseek64(fd, offset, whence); if(ret < 0) - return(-errno); - return(0); + return -errno; + return 0; } int fsync_file(int fd, int datasync) @@ -198,65 +208,90 @@ int file_create(char *name, int ur, int uw, int ux, int gr, mode |= ox ? S_IXOTH : 0; fd = open64(name, O_CREAT | O_RDWR, mode); if(fd < 0) - return(-errno); - return(fd); + return -errno; + return fd; } -int set_attr(const char *file, struct hostfs_iattr *attrs) +int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) { - struct utimbuf buf; + struct timeval times[2]; + struct timespec atime_ts, mtime_ts; int err, ma; - if(attrs->ia_valid & HOSTFS_ATTR_MODE){ - if(chmod(file, attrs->ia_mode) != 0) return(-errno); - } - if(attrs->ia_valid & HOSTFS_ATTR_UID){ - if(chown(file, attrs->ia_uid, -1)) return(-errno); + if (attrs->ia_valid & HOSTFS_ATTR_MODE) { + if (fd >= 0) { + if (fchmod(fd, attrs->ia_mode) != 0) + return (-errno); + } else if (chmod(file, attrs->ia_mode) != 0) { + return -errno; + } } - if(attrs->ia_valid & HOSTFS_ATTR_GID){ - if(chown(file, -1, attrs->ia_gid)) return(-errno); + if (attrs->ia_valid & HOSTFS_ATTR_UID) { + if (fd >= 0) { + if (fchown(fd, attrs->ia_uid, -1)) + return -errno; + } else if(chown(file, attrs->ia_uid, -1)) { + return -errno; + } } - if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ - if(truncate(file, attrs->ia_size)) return(-errno); + if (attrs->ia_valid & HOSTFS_ATTR_GID) { + if (fd >= 0) { + if (fchown(fd, -1, attrs->ia_gid)) + return -errno; + } else if (chown(file, -1, attrs->ia_gid)) { + return -errno; + } } - ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; - if((attrs->ia_valid & ma) == ma){ - buf.actime = attrs->ia_atime.tv_sec; - buf.modtime = attrs->ia_mtime.tv_sec; - if(utime(file, &buf) != 0) return(-errno); + if (attrs->ia_valid & HOSTFS_ATTR_SIZE) { + if (fd >= 0) { + if (ftruncate(fd, attrs->ia_size)) + return -errno; + } else if (truncate(file, attrs->ia_size)) { + return -errno; + } } - else { - struct timespec ts; - - if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ - err = stat_file(file, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, &ts, NULL, NULL, NULL); - if(err != 0) - return(err); - buf.actime = attrs->ia_atime.tv_sec; - buf.modtime = ts.tv_sec; - if(utime(file, &buf) != 0) - return(-errno); + + /* Update accessed and/or modified time, in two parts: first set + * times according to the changes to perform, and then call futimes() + * or utimes() to apply them. */ + ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); + if (attrs->ia_valid & ma) { + err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, + &atime_ts, &mtime_ts, NULL, NULL, NULL, fd); + if (err != 0) + return err; + + times[0].tv_sec = atime_ts.tv_sec; + times[0].tv_usec = atime_ts.tv_nsec * 1000; + times[1].tv_sec = mtime_ts.tv_sec; + times[1].tv_usec = mtime_ts.tv_nsec * 1000; + + if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { + times[0].tv_sec = attrs->ia_atime.tv_sec; + times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000; + } + if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) { + times[1].tv_sec = attrs->ia_mtime.tv_sec; + times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000; } - if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ - err = stat_file(file, NULL, NULL, NULL, NULL, NULL, - NULL, &ts, NULL, NULL, NULL, NULL); - if(err != 0) - return(err); - buf.actime = ts.tv_sec; - buf.modtime = attrs->ia_mtime.tv_sec; - if(utime(file, &buf) != 0) - return(-errno); + + if (fd >= 0) { + if (futimes(fd, times) != 0) + return -errno; + } else if (utimes(file, times) != 0) { + return -errno; } } + if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, &attrs->ia_atime, &attrs->ia_mtime, NULL, - NULL, NULL); - if(err != 0) return(err); + NULL, NULL, fd); + if(err != 0) + return err; } - return(0); + return 0; } int make_symlink(const char *from, const char *to) @@ -264,8 +299,9 @@ int make_symlink(const char *from, const char *to) int err; err = symlink(to, from); - if(err) return(-errno); - return(0); + if(err) + return -errno; + return 0; } int unlink_file(const char *file) @@ -273,8 +309,9 @@ int unlink_file(const char *file) int err; err = unlink(file); - if(err) return(-errno); - return(0); + if(err) + return -errno; + return 0; } int do_mkdir(const char *file, int mode) @@ -282,8 +319,9 @@ int do_mkdir(const char *file, int mode) int err; err = mkdir(file, mode); - if(err) return(-errno); - return(0); + if(err) + return -errno; + return 0; } int do_rmdir(const char *file) @@ -291,8 +329,9 @@ int do_rmdir(const char *file) int err; err = rmdir(file); - if(err) return(-errno); - return(0); + if(err) + return -errno; + return 0; } int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor) @@ -300,8 +339,9 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor) int err; err = mknod(file, mode, makedev(major, minor)); - if(err) return(-errno); - return(0); + if(err) + return -errno; + return 0; } int link_file(const char *to, const char *from) @@ -309,8 +349,9 @@ int link_file(const char *to, const char *from) int err; err = link(to, from); - if(err) return(-errno); - return(0); + if(err) + return -errno; + return 0; } int do_readlink(char *file, char *buf, int size) @@ -319,10 +360,10 @@ int do_readlink(char *file, char *buf, int size) n = readlink(file, buf, size); if(n < 0) - return(-errno); + return -errno; if(n < size) buf[n] = '\0'; - return(n); + return n; } int rename_file(char *from, char *to) @@ -330,8 +371,9 @@ int rename_file(char *from, char *to) int err; err = rename(from, to); - if(err < 0) return(-errno); - return(0); + if(err < 0) + return -errno; + return 0; } int do_statfs(char *root, long *bsize_out, long long *blocks_out, @@ -344,7 +386,9 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out, int err; err = statfs64(root, &buf); - if(err < 0) return(-errno); + if(err < 0) + return -errno; + *bsize_out = buf.f_bsize; *blocks_out = buf.f_blocks; *bfree_out = buf.f_bfree; @@ -360,16 +404,5 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out, spare_out[2] = buf.f_spare[2]; spare_out[3] = buf.f_spare[3]; spare_out[4] = buf.f_spare[4]; - return(0); + return 0; } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index b52b7381d10..b6fca543544 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c @@ -5,7 +5,7 @@ * * general buffer i/o */ - +#include <linux/sched.h> #include "hpfs_fn.h" void hpfs_lock_creation(struct super_block *s) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9953cf9a2f1..d256559b410 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -5,7 +5,7 @@ * * adding & removing files & directories */ - +#include <linux/sched.h> #include "hpfs_fn.h" static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 1b95f39fbc3..29cc34abb2e 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/statfs.h> #include <linux/magic.h> +#include <linux/sched.h> /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ @@ -176,11 +177,9 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - mutex_init(&ei->i_mutex); - mutex_init(&ei->i_parent_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->i_mutex); + mutex_init(&ei->i_parent_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 98959b87cdf..e6b46b3ac2f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -556,8 +556,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } const struct file_operations hugetlbfs_file_operations = { @@ -737,15 +736,13 @@ static int can_do_hugetlb_shm(void) can_do_mlock()); } -struct file *hugetlb_zero_setup(size_t size) +struct file *hugetlb_file_setup(const char *name, size_t size) { int error = -ENOMEM; struct file *file; struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; - char buf[16]; - static atomic_t counter; if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); @@ -757,8 +754,7 @@ struct file *hugetlb_zero_setup(size_t size) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; - snprintf(buf, 16, "%u", atomic_inc_return(&counter)); - quick_string.name = buf; + quick_string.name = name; quick_string.len = strlen(quick_string.name); quick_string.hash = 0; dentry = d_alloc(root, &quick_string); diff --git a/fs/inode.c b/fs/inode.c index b4296bf6273..9a012cc5b6c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -213,8 +213,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct inode * inode = (struct inode *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(inode); + inode_init_once(inode); } /* @@ -250,7 +249,7 @@ void clear_inode(struct inode *inode) BUG_ON(inode->i_state & I_CLEAR); wait_on_inode(inode); DQUOT_DROP(inode); - if (inode->i_sb && inode->i_sb->s_op->clear_inode) + if (inode->i_sb->s_op->clear_inode) inode->i_sb->s_op->clear_inode(inode); if (S_ISBLK(inode->i_mode) && inode->i_bdev) bd_forget(inode); @@ -275,7 +274,7 @@ static void dispose_list(struct list_head *head) while (!list_empty(head)) { struct inode *inode; - inode = list_entry(head->next, struct inode, i_list); + inode = list_first_entry(head, struct inode, i_list); list_del(&inode->i_list); if (inode->i_data.nrpages) @@ -524,7 +523,12 @@ repeat: */ struct inode *new_inode(struct super_block *sb) { - static unsigned long last_ino; + /* + * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW + * error if st_ino won't fit in target struct field. Use 32bit counter + * here to attempt to avoid that. + */ + static unsigned int last_ino; struct inode * inode; spin_lock_prefetch(&inode_lock); @@ -683,27 +687,28 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) */ ino_t iunique(struct super_block *sb, ino_t max_reserved) { - static ino_t counter; + /* + * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW + * error if st_ino won't fit in target struct field. Use 32bit counter + * here to attempt to avoid that. + */ + static unsigned int counter; struct inode *inode; - struct hlist_head * head; + struct hlist_head *head; ino_t res; + spin_lock(&inode_lock); -retry: - if (counter > max_reserved) { - head = inode_hashtable + hash(sb,counter); + do { + if (counter <= max_reserved) + counter = max_reserved + 1; res = counter++; + head = inode_hashtable + hash(sb, res); inode = find_inode_fast(sb, head, res); - if (!inode) { - spin_unlock(&inode_lock); - return res; - } - } else { - counter = max_reserved + 1; - } - goto retry; - -} + } while (inode != NULL); + spin_unlock(&inode_lock); + return res; +} EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) @@ -1040,7 +1045,7 @@ static void generic_forget_inode(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_LOCK))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; - if (!sb || (sb->s_flags & MS_ACTIVE)) { + if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); return; } diff --git a/fs/inotify.c b/fs/inotify.c index f5099d86fd9..7457501b956 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -509,7 +509,7 @@ void inotify_destroy(struct inotify_handle *ih) mutex_unlock(&ih->mutex); break; } - watch = list_entry(watches->next, struct inotify_watch, h_list); + watch = list_first_entry(watches, struct inotify_watch, h_list); get_inotify_watch(watch); mutex_unlock(&ih->mutex); diff --git a/fs/internal.h b/fs/internal.h index ea00126c9a5..392e8ccd6fc 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,8 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/ioctl32.h> - struct super_block; /* @@ -42,14 +40,6 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) extern void __init chrdev_init(void); /* - * compat_ioctl.c - */ -#ifdef CONFIG_COMPAT -extern struct ioctl_trans ioctl_start[]; -extern int ioctl_table_size; -#endif - -/* * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); diff --git a/fs/ioctl.c b/fs/ioctl.c index ff61772ceed..8c90cbc903f 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -12,6 +12,7 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/module.h> +#include <linux/kallsyms.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -20,6 +21,7 @@ static long do_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int error = -ENOTTY; + void *f; if (!filp->f_op) goto out; @@ -29,10 +31,16 @@ static long do_ioctl(struct file *filp, unsigned int cmd, if (error == -ENOIOCTLCMD) error = -EINVAL; goto out; - } else if (filp->f_op->ioctl) { + } else if ((f = filp->f_op->ioctl)) { lock_kernel(); - error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, - filp, cmd, arg); + if (!filp->f_op->ioctl) { + printk("%s: ioctl %p disappeared\n", __FUNCTION__, f); + print_symbol("symbol: %s\n", (unsigned long)f); + dump_stack(); + } else { + error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, + filp, cmd, arg); + } unlock_kernel(); } @@ -67,8 +75,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd, return put_user(res, p); } case FIGETBSZ: - if (inode->i_sb == NULL) - return -EBADF; return put_user(inode->i_sb->s_blocksize, p); case FIONREAD: return put_user(i_size_read(inode) - filp->f_pos, p); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index e99f7ff4ecb..5c3eecf7542 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -77,8 +77,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct iso_inode_info *ei = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 0208cc7ac5d..47552d4a632 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -1,5 +1,5 @@ /* - * linux/fs/checkpoint.c + * linux/fs/jbd/checkpoint.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 * diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index be4648bc7a2..1facfaff97c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -20,7 +20,6 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> /* * Default IO end handler for temporary BJ_IO buffer_heads. diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 10fff944393..46fe7439fb9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -28,7 +28,6 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/freezer.h> @@ -211,10 +210,16 @@ end_loop: return 0; } -static void journal_start_thread(journal_t *journal) +static int journal_start_thread(journal_t *journal) { - kthread_run(kjournald, journal, "kjournald"); + struct task_struct *t; + + t = kthread_run(kjournald, journal, "kjournald"); + if (IS_ERR(t)) + return PTR_ERR(t); + wait_event(journal->j_wait_done_commit, journal->j_task != 0); + return 0; } static void journal_kill_thread(journal_t *journal) @@ -840,8 +845,7 @@ static int journal_reset(journal_t *journal) /* Add the dynamic fields and write it to disk. */ journal_update_superblock(journal, 1); - journal_start_thread(journal); - return 0; + return journal_start_thread(journal); } /** diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 11563fe2a52..2a5f4b833e3 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -1,5 +1,5 @@ /* - * linux/fs/recovery.c + * linux/fs/jbd/recovery.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 * diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index d204ab394f3..824e3b7d4ec 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -1,5 +1,5 @@ /* - * linux/fs/revoke.c + * linux/fs/jbd/revoke.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 * @@ -66,7 +66,6 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/list.h> -#include <linux/smp_lock.h> #include <linux/init.h> #endif diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index cceaf57e377..772b6531a2a 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1,5 +1,5 @@ /* - * linux/fs/transaction.c + * linux/fs/jbd/transaction.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 * @@ -23,7 +23,6 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/timer.h> -#include <linux/smp_lock.h> #include <linux/mm.h> #include <linux/highmem.h> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 68039fa9a56..3fccde7ba00 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -1,5 +1,5 @@ /* - * linux/fs/checkpoint.c + * linux/fs/jbd2/checkpoint.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 * diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6bd8005e3d3..2856e1100a5 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -20,7 +20,6 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> /* * Default IO end handler for temporary BJ_IO buffer_heads. diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 44fc32bfd7f..78d63b818f0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -28,7 +28,6 @@ #include <linux/jbd2.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/freezer.h> @@ -211,10 +210,16 @@ end_loop: return 0; } -static void jbd2_journal_start_thread(journal_t *journal) +static int jbd2_journal_start_thread(journal_t *journal) { - kthread_run(kjournald2, journal, "kjournald2"); + struct task_struct *t; + + t = kthread_run(kjournald2, journal, "kjournald2"); + if (IS_ERR(t)) + return PTR_ERR(t); + wait_event(journal->j_wait_done_commit, journal->j_task != 0); + return 0; } static void journal_kill_thread(journal_t *journal) @@ -840,8 +845,7 @@ static int journal_reset(journal_t *journal) /* Add the dynamic fields and write it to disk. */ jbd2_journal_update_superblock(journal, 1); - jbd2_journal_start_thread(journal); - return 0; + return jbd2_journal_start_thread(journal); } /** diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 9f10acafaf7..395c92a04ac 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -1,5 +1,5 @@ /* - * linux/fs/recovery.c + * linux/fs/jbd2/recovery.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 * diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f506646ad0f..9246e763da7 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -1,5 +1,5 @@ /* - * linux/fs/revoke.c + * linux/fs/jbd2/revoke.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 * @@ -66,7 +66,6 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/list.h> -#include <linux/smp_lock.h> #include <linux/init.h> #endif diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 3a8700153cb..7946ff43fc4 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1,5 +1,5 @@ /* - * linux/fs/transaction.c + * linux/fs/jbd2/transaction.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 * @@ -23,7 +23,6 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/timer.h> -#include <linux/smp_lock.h> #include <linux/mm.h> #include <linux/highmem.h> diff --git a/fs/jffs2/histo_mips.h b/fs/jffs2/histo_mips.h deleted file mode 100644 index fa3dac19a10..00000000000 --- a/fs/jffs2/histo_mips.h +++ /dev/null @@ -1,2 +0,0 @@ -#define BIT_DIVIDER_MIPS 1043 -static int bits_mips[8] = { 277,249,290,267,229,341,212,241}; /* mips32 */ diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 6aff38930b5..7b363786c2d 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -210,8 +210,7 @@ static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info * * offset, and the one with the smallest length will come first in the * ordering. * - * Returns 0 if the node was inserted - * 1 if the node is obsolete (because we can't mark it so yet) + * Returns 0 if the node was handled (including marking it obsolete) * < 0 an if error occurred */ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, @@ -219,9 +218,9 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) { uint32_t fn_end = tn->fn->ofs + tn->fn->size; - struct jffs2_tmp_dnode_info *insert_point = NULL, *this; + struct jffs2_tmp_dnode_info *this; - dbg_readinode("insert fragment %#04x-%#04x, ver %u\n", tn->fn->ofs, fn_end, tn->version); + dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw)); /* If a node has zero dsize, we only have to keep if it if it might be the node with highest version -- i.e. the one which will end up as f->metadata. @@ -229,9 +228,16 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, check anyway. */ if (!tn->fn->size) { if (rii->mdata_tn) { - /* We had a candidate mdata node already */ - dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version); - jffs2_kill_tn(c, rii->mdata_tn); + if (rii->mdata_tn->version < tn->version) { + /* We had a candidate mdata node already */ + dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version); + jffs2_kill_tn(c, rii->mdata_tn); + } else { + dbg_readinode("kill new mdata with ver %d (older than existing %d\n", + tn->version, rii->mdata_tn->version); + jffs2_kill_tn(c, tn); + return 0; + } } rii->mdata_tn = tn; dbg_readinode("keep new mdata with ver %d\n", tn->version); @@ -240,23 +246,16 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, /* Find the earliest node which _may_ be relevant to this one */ this = jffs2_lookup_tn(&rii->tn_root, tn->fn->ofs); - if (!this) { - /* First addition to empty tree. $DEITY how I love the easy cases */ - rb_link_node(&tn->rb, NULL, &rii->tn_root.rb_node); - rb_insert_color(&tn->rb, &rii->tn_root); - dbg_readinode("keep new frag\n"); - return 0; - } - - /* If we add a new node it'll be somewhere under here. */ - insert_point = this; - - /* If the node is coincident with another at a lower address, - back up until the other node is found. It may be relevant */ - while (tn->overlapped) - tn = tn_prev(tn); + if (this) { + /* If the node is coincident with another at a lower address, + back up until the other node is found. It may be relevant */ + while (this->overlapped) + this = tn_prev(this); - dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole"); + /* First node should never be marked overlapped */ + BUG_ON(!this); + dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole"); + } while (this) { if (this->fn->ofs > fn_end) @@ -274,11 +273,10 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, return 0; } else { /* Who cares if the new one is good; keep it for now anyway. */ + dbg_readinode("Like new node. Throw away old\n"); rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); - /* Same overlapping from in front and behind */ - tn->overlapped = this->overlapped; jffs2_kill_tn(c, this); - dbg_readinode("Like new node. Throw away old\n"); + /* Same overlapping from in front and behind */ return 0; } } @@ -291,13 +289,8 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, jffs2_kill_tn(c, tn); return 0; } - /* ... and is good. Kill 'this'... */ - rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); - tn->overlapped = this->overlapped; - jffs2_kill_tn(c, this); - /* ... and any subsequent nodes which are also overlapped */ - this = tn_next(tn); - while (this && this->fn->ofs + this->fn->size < fn_end) { + /* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */ + while (this && this->fn->ofs + this->fn->size <= fn_end) { struct jffs2_tmp_dnode_info *next = tn_next(this); if (this->version < tn->version) { tn_erase(this, &rii->tn_root); @@ -308,8 +301,8 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, } this = next; } - dbg_readinode("Done inserting new\n"); - return 0; + dbg_readinode("Done killing overlapped nodes\n"); + continue; } if (this->version > tn->version && this->fn->ofs <= tn->fn->ofs && @@ -321,29 +314,21 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, return 0; } /* ... but 'this' was bad. Replace it... */ - rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); dbg_readinode("Bad CRC on old overlapping node. Kill it\n"); + tn_erase(this, &rii->tn_root); jffs2_kill_tn(c, this); - return 0; + break; } - /* We want to be inserted under the last node which is - either at a lower offset _or_ has a smaller range */ - if (this->fn->ofs < tn->fn->ofs || - (this->fn->ofs == tn->fn->ofs && - this->fn->size <= tn->fn->size)) - insert_point = this; this = tn_next(this); } - dbg_readinode("insert_point %p, ver %d, 0x%x-0x%x, ov %d\n", - insert_point, insert_point->version, insert_point->fn->ofs, - insert_point->fn->ofs+insert_point->fn->size, - insert_point->overlapped); + /* We neither completely obsoleted nor were completely - obsoleted by an earlier node. Insert under insert_point */ + obsoleted by an earlier node. Insert into the tree */ { - struct rb_node *parent = &insert_point->rb; - struct rb_node **link = &parent; + struct rb_node *parent; + struct rb_node **link = &rii->tn_root.rb_node; + struct jffs2_tmp_dnode_info *insert_point = NULL; while (*link) { parent = *link; @@ -359,6 +344,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, rb_link_node(&tn->rb, &insert_point->rb, link); rb_insert_color(&tn->rb, &rii->tn_root); } + /* If there's anything behind that overlaps us, note it */ this = tn_prev(tn); if (this) { @@ -457,7 +443,7 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, this = tn_last(&rii->tn_root); while (this) { dbg_readinode("tn %p ver %d range 0x%x-0x%x ov %d\n", this, this->version, this->fn->ofs, - this->fn->ofs+this->fn->size, this->overlapped); + this->fn->ofs+this->fn->size, this->overlapped); this = tn_prev(this); } #endif @@ -483,7 +469,7 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, vers_next = tn_prev(this); eat_last(&ver_root, &this->rb); if (check_tn_node(c, this)) { - dbg_readinode("node ver %x, 0x%x-0x%x failed CRC\n", + dbg_readinode("node ver %d, 0x%x-0x%x failed CRC\n", this->version, this->fn->ofs, this->fn->ofs+this->fn->size); jffs2_kill_tn(c, this); @@ -496,7 +482,7 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, high_ver = this->version; rii->latest_ref = this->fn->raw; } - dbg_readinode("Add %p (v %x, 0x%x-0x%x, ov %d) to fragtree\n", + dbg_readinode("Add %p (v %d, 0x%x-0x%x, ov %d) to fragtree\n", this, this->version, this->fn->ofs, this->fn->ofs+this->fn->size, this->overlapped); @@ -585,8 +571,7 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r * Helper function for jffs2_get_inode_nodes(). * It is called every time an directory entry node is found. * - * Returns: 0 on succes; - * 1 if the node should be marked obsolete; + * Returns: 0 on success; * negative error code on failure. */ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, @@ -693,8 +678,7 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r * Helper function for jffs2_get_inode_nodes(). * It is called every time an inode node is found. * - * Returns: 0 on success; - * 1 if the node should be marked obsolete; + * Returns: 0 on success (possibly after marking a bad node obsolete); * negative error code on failure. */ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, @@ -703,7 +687,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref { struct jffs2_tmp_dnode_info *tn; uint32_t len, csize; - int ret = 1; + int ret = 0; uint32_t crc; /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */ @@ -732,8 +716,9 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref /* Sanity checks */ if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) || unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) { - JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref)); - jffs2_dbg_dump_node(c, ref_offset(ref)); + JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref)); + jffs2_dbg_dump_node(c, ref_offset(ref)); + jffs2_mark_node_obsolete(c, ref); goto free_out; } @@ -788,6 +773,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) { JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n", ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc)); + jffs2_mark_node_obsolete(c, ref); goto free_out; } @@ -850,7 +836,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref return ret; } #ifdef JFFS2_DBG_READINODE_MESSAGES - dbg_readinode("After adding ver %d:\n", tn->version); + dbg_readinode("After adding ver %d:\n", je32_to_cpu(rd->version)); tn = tn_first(&rii->tn_root); while (tn) { dbg_readinode("%p: v %d r 0x%x-0x%x ov %d\n", @@ -867,7 +853,6 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref * It is called every time an unknown node is found. * * Returns: 0 on success; - * 1 if the node should be marked obsolete; * negative error code on failure. */ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un) @@ -1064,7 +1049,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf case JFFS2_NODETYPE_DIRENT: - if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) { + if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent) && + len < sizeof(struct jffs2_raw_dirent)) { err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf); if (unlikely(err)) goto free_out; @@ -1078,7 +1064,8 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf case JFFS2_NODETYPE_INODE: - if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) { + if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode) && + len < sizeof(struct jffs2_raw_inode)) { err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf); if (unlikely(err)) goto free_out; @@ -1091,17 +1078,15 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf break; default: - if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) { + if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node) && + len < sizeof(struct jffs2_unknown_node)) { err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf); if (unlikely(err)) goto free_out; } err = read_unknown(c, ref, &node->u); - if (err == 1) { - jffs2_mark_node_obsolete(c, ref); - break; - } else if (unlikely(err)) + if (unlikely(err)) goto free_out; } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 45368f8bbe7..e220d3bd610 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -19,7 +19,7 @@ #include <linux/mount.h> #include <linux/jffs2.h> #include <linux/pagemap.h> -#include <linux/mtd/mtd.h> +#include <linux/mtd/super.h> #include <linux/ctype.h> #include <linux/namei.h> #include "compr.h" @@ -47,10 +47,8 @@ static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned l { struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - init_MUTEX(&ei->sem); - inode_init_once(&ei->vfs_inode); - } + init_MUTEX(&ei->sem); + inode_init_once(&ei->vfs_inode); } static int jffs2_sync_fs(struct super_block *sb, int wait) @@ -77,69 +75,27 @@ static const struct super_operations jffs2_super_operations = .sync_fs = jffs2_sync_fs, }; -static int jffs2_sb_compare(struct super_block *sb, void *data) -{ - struct jffs2_sb_info *p = data; - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - - /* The superblocks are considered to be equivalent if the underlying MTD - device is the same one */ - if (c->mtd == p->mtd) { - D1(printk(KERN_DEBUG "jffs2_sb_compare: match on device %d (\"%s\")\n", p->mtd->index, p->mtd->name)); - return 1; - } else { - D1(printk(KERN_DEBUG "jffs2_sb_compare: No match, device %d (\"%s\"), device %d (\"%s\")\n", - c->mtd->index, c->mtd->name, p->mtd->index, p->mtd->name)); - return 0; - } -} - -static int jffs2_sb_set(struct super_block *sb, void *data) -{ - struct jffs2_sb_info *p = data; - - /* For persistence of NFS exports etc. we use the same s_dev - each time we mount the device, don't just use an anonymous - device */ - sb->s_fs_info = p; - p->os_priv = sb; - sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, p->mtd->index); - - return 0; -} - -static int jffs2_get_sb_mtd(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct mtd_info *mtd, - struct vfsmount *mnt) +/* + * fill in the superblock + */ +static int jffs2_fill_super(struct super_block *sb, void *data, int silent) { - struct super_block *sb; struct jffs2_sb_info *c; - int ret; + + D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():" + " New superblock for device %d (\"%s\")\n", + sb->s_mtd->index, sb->s_mtd->name)); c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return -ENOMEM; - c->mtd = mtd; - sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c); - - if (IS_ERR(sb)) - goto out_error; - - if (sb->s_root) { - /* New mountpoint for JFFS2 which is already mounted */ - D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n", - mtd->index, mtd->name)); - ret = simple_set_mnt(mnt, sb); - goto out_put; - } + c->mtd = sb->s_mtd; + c->os_priv = sb; + sb->s_fs_info = c; - D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n", - mtd->index, mtd->name)); - - /* Initialize JFFS2 superblock locks, the further initialization will be - * done later */ + /* Initialize JFFS2 superblock locks, the further initialization will + * be done later */ init_MUTEX(&c->alloc_sem); init_MUTEX(&c->erase_free_sem); init_waitqueue_head(&c->erase_wait); @@ -148,133 +104,20 @@ static int jffs2_get_sb_mtd(struct file_system_type *fs_type, spin_lock_init(&c->inocache_lock); sb->s_op = &jffs2_super_operations; - sb->s_flags = flags | MS_NOATIME; + sb->s_flags = sb->s_flags | MS_NOATIME; sb->s_xattr = jffs2_xattr_handlers; #ifdef CONFIG_JFFS2_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif - ret = jffs2_do_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); - - if (ret) { - /* Failure case... */ - up_write(&sb->s_umount); - deactivate_super(sb); - return ret; - } - - sb->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, sb); - -out_error: - ret = PTR_ERR(sb); - out_put: - kfree(c); - put_mtd_device(mtd); - - return ret; -} - -static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, int mtdnr, - struct vfsmount *mnt) -{ - struct mtd_info *mtd; - - mtd = get_mtd_device(NULL, mtdnr); - if (IS_ERR(mtd)) { - D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr)); - return PTR_ERR(mtd); - } - - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); + return jffs2_do_fill_super(sb, data, silent); } static int jffs2_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - int err; - struct nameidata nd; - int mtdnr; - - if (!dev_name) - return -EINVAL; - - D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name)); - - /* The preferred way of mounting in future; especially when - CONFIG_BLK_DEV is implemented - we specify the underlying - MTD device by number or by name, so that we don't require - block device support to be present in the kernel. */ - - /* FIXME: How to do the root fs this way? */ - - if (dev_name[0] == 'm' && dev_name[1] == 't' && dev_name[2] == 'd') { - /* Probably mounting without the blkdev crap */ - if (dev_name[3] == ':') { - struct mtd_info *mtd; - - /* Mount by MTD device name */ - D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd:%%s, name \"%s\"\n", dev_name+4)); - for (mtdnr = 0; mtdnr < MAX_MTD_DEVICES; mtdnr++) { - mtd = get_mtd_device(NULL, mtdnr); - if (!IS_ERR(mtd)) { - if (!strcmp(mtd->name, dev_name+4)) - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); - put_mtd_device(mtd); - } - } - printk(KERN_NOTICE "jffs2_get_sb(): MTD device with name \"%s\" not found.\n", dev_name+4); - } else if (isdigit(dev_name[3])) { - /* Mount by MTD device number name */ - char *endptr; - - mtdnr = simple_strtoul(dev_name+3, &endptr, 0); - if (!*endptr) { - /* It was a valid number */ - D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr)); - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); - } - } - } - - /* Try the old way - the hack where we allowed users to mount - /dev/mtdblock$(n) but didn't actually _use_ the blkdev */ - - err = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); - - D1(printk(KERN_DEBUG "jffs2_get_sb(): path_lookup() returned %d, inode %p\n", - err, nd.dentry->d_inode)); - - if (err) - return err; - - err = -EINVAL; - - if (!S_ISBLK(nd.dentry->d_inode->i_mode)) - goto out; - - if (nd.mnt->mnt_flags & MNT_NODEV) { - err = -EACCES; - goto out; - } - - if (imajor(nd.dentry->d_inode) != MTD_BLOCK_MAJOR) { - if (!(flags & MS_SILENT)) - printk(KERN_NOTICE "Attempt to mount non-MTD device \"%s\" as JFFS2\n", - dev_name); - goto out; - } - - mtdnr = iminor(nd.dentry->d_inode); - path_release(&nd); - - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); - -out: - path_release(&nd); - return err; + return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super, + mnt); } static void jffs2_put_super (struct super_block *sb) @@ -309,8 +152,7 @@ static void jffs2_kill_sb(struct super_block *sb) struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); if (!(sb->s_flags & MS_RDONLY)) jffs2_stop_garbage_collect_thread(c); - generic_shutdown_super(sb); - put_mtd_device(c->mtd); + kill_mtd_super(sb); kfree(c); } diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index c556e85a565..91d1d0f1c66 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -637,7 +637,10 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) memset(c->wbuf,0xff,c->wbuf_pagesize); /* adjust write buffer offset, else we get a non contiguous write bug */ - c->wbuf_ofs += c->wbuf_pagesize; + if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize)) + c->wbuf_ofs += c->wbuf_pagesize; + else + c->wbuf_ofs = 0xffffffff; c->wbuf_len = 0; return 0; } diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 78fc08893a6..e48665984cb 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -754,6 +754,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) list_del(&xd->xindex); jffs2_free_xattr_datum(xd); } + list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) { + list_del(&xd->xindex); + jffs2_free_xattr_datum(xd); + } } #define XREF_TMPHASH_SIZE (128) @@ -825,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) ref->xd and ref->ic are not valid yet. */ xd = jffs2_find_xattr_datum(c, ref->xid); ic = jffs2_get_ino_cache(c, ref->ino); - if (!xd || !ic) { + if (!xd || !ic || !ic->nlink) { dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", ref->ino, ref->xid, ref->xseqno); ref->xseqno |= XREF_DELETE_MARKER; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index e285022f006..3467dde27e5 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -55,7 +55,6 @@ void jfs_read_inode(struct inode *inode) inode->i_op = &jfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); } - jfs_set_inode_flags(inode); } /* diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index ed814b1ff4d..fe063af6fd2 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -59,6 +59,7 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, switch (cmd) { case JFS_IOC_GETFLAGS: + jfs_get_inode_flags(jfs_inode); flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; flags = jfs_map_ext2(flags, 0); return put_user(flags, (int __user *) arg); @@ -78,6 +79,7 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, if (!S_ISDIR(inode->i_mode)) flags &= ~JFS_DIRSYNC_FL; + jfs_get_inode_flags(jfs_inode); oldflags = jfs_inode->mode2; /* diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 82b0544bd76..f3b1ebb2228 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1507,7 +1507,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) if (l2nb < budmin) { /* search the lower level dmap control pages to get - * the starting block number of the the dmap that + * the starting block number of the dmap that * contains or starts off the free space. */ if ((rc = diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index aa5124b643b..c6530227cda 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -386,7 +386,7 @@ int diRead(struct inode *ip) return -EIO; } - /* locate the the disk inode requested */ + /* locate the disk inode requested */ dp = (struct dinode *) mp->data; dp += rel_inode; @@ -1407,7 +1407,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) inum = pip->i_ino + 1; ino = inum & (INOSPERIAG - 1); - /* back off the the hint if it is outside of the iag */ + /* back off the hint if it is outside of the iag */ if (ino == 0) inum = pip->i_ino; @@ -3078,6 +3078,7 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); + jfs_set_inode_flags(ip); ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; if (sbi->umask != -1) { @@ -3174,6 +3175,7 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) dip->di_gid = cpu_to_le32(ip->i_gid); else dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); + jfs_get_inode_flags(jfs_ip); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 4c67ed97682..ed6574bee51 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -45,6 +45,24 @@ void jfs_set_inode_flags(struct inode *inode) inode->i_flags |= S_SYNC; } +void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip) +{ + unsigned int flags = jfs_ip->vfs_inode.i_flags; + + jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL | + JFS_DIRSYNC_FL | JFS_SYNC_FL); + if (flags & S_IMMUTABLE) + jfs_ip->mode2 |= JFS_IMMUTABLE_FL; + if (flags & S_APPEND) + jfs_ip->mode2 |= JFS_APPEND_FL; + if (flags & S_NOATIME) + jfs_ip->mode2 |= JFS_NOATIME_FL; + if (flags & S_DIRSYNC) + jfs_ip->mode2 |= JFS_DIRSYNC_FL; + if (flags & S_SYNC) + jfs_ip->mode2 |= JFS_SYNC_FL; +} + /* * NAME: ialloc() * diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 6802837f757..2374b595f2e 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -31,6 +31,7 @@ extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern void jfs_get_inode_flags(struct jfs_inode_info *); extern void jfs_set_inode_flags(struct inode *); extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h index df48ece4b7a..ecf04882265 100644 --- a/fs/jfs/jfs_lock.h +++ b/fs/jfs/jfs_lock.h @@ -45,7 +45,7 @@ do { \ io_schedule(); \ lock_cmd; \ } \ - current->state = TASK_RUNNING; \ + __set_current_state(TASK_RUNNING); \ remove_wait_queue(&wq, &__wait); \ } while (0) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 5065baa530b..44a2f33cb98 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -62,7 +62,6 @@ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/interrupt.h> -#include <linux/smp_lock.h> #include <linux/completion.h> #include <linux/kthread.h> #include <linux/buffer_head.h> /* for sync_blockdev() */ @@ -1590,7 +1589,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) set_current_state(TASK_UNINTERRUPTIBLE); LOGGC_UNLOCK(log); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); LOGGC_LOCK(log); remove_wait_queue(&target->gcwait, &__wait); } @@ -1961,7 +1960,7 @@ static void lbmfree(struct lbuf * bp) /* * NAME: lbmRedrive * - * FUNCTION: add a log buffer to the the log redrive list + * FUNCTION: add a log buffer to the log redrive list * * PARAMETER: * bp - log buffer @@ -2354,14 +2353,15 @@ int jfsIOWait(void *arg) lbmStartIO(bp); spin_lock_irq(&log_redrive_lock); } - spin_unlock_irq(&log_redrive_lock); if (freezing(current)) { + spin_unlock_irq(&log_redrive_lock); refrigerator(); } else { set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irq(&log_redrive_lock); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 6b3acb0b578..43d4f69afbe 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -184,16 +184,14 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - mp->lid = 0; - mp->lsn = 0; - mp->flag = 0; - mp->data = NULL; - mp->clsn = 0; - mp->log = NULL; - set_bit(META_free, &mp->flag); - init_waitqueue_head(&mp->wait); - } + mp->lid = 0; + mp->lsn = 0; + mp->flag = 0; + mp->data = NULL; + mp->clsn = 0; + mp->log = NULL; + set_bit(META_free, &mp->flag); + init_waitqueue_head(&mp->wait); } static inline struct metapage *alloc_metapage(gfp_t gfp_mask) diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 03893acbfda..25430d0b0d5 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -44,7 +44,6 @@ #include <linux/fs.h> #include <linux/vmalloc.h> -#include <linux/smp_lock.h> #include <linux/completion.h> #include <linux/freezer.h> #include <linux/module.h> @@ -136,7 +135,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) set_current_state(TASK_UNINTERRUPTIBLE); TXN_UNLOCK(); io_schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(event, &wait); } @@ -2798,7 +2797,7 @@ int jfs_lazycommit(void *arg) set_current_state(TASK_INTERRUPTIBLE); LAZY_UNLOCK(flags); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&jfs_commit_thread_wait, &wq); } } while (!kthread_should_stop()); @@ -2990,7 +2989,7 @@ int jfs_sync(void *arg) set_current_state(TASK_INTERRUPTIBLE); TXN_UNLOCK(); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index ea9dc3e65dc..20e4ac1c79a 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -752,20 +752,18 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); - INIT_LIST_HEAD(&jfs_ip->anon_inode_list); - init_rwsem(&jfs_ip->rdwrlock); - mutex_init(&jfs_ip->commit_mutex); - init_rwsem(&jfs_ip->xattr_sem); - spin_lock_init(&jfs_ip->ag_lock); - jfs_ip->active_ag = -1; + memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); + INIT_LIST_HEAD(&jfs_ip->anon_inode_list); + init_rwsem(&jfs_ip->rdwrlock); + mutex_init(&jfs_ip->commit_mutex); + init_rwsem(&jfs_ip->xattr_sem); + spin_lock_init(&jfs_ip->ag_lock); + jfs_ip->active_ag = -1; #ifdef CONFIG_JFS_POSIX_ACL - jfs_ip->i_acl = JFS_ACL_NOT_CACHED; - jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; + jfs_ip->i_acl = JFS_ACL_NOT_CACHED; + jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; #endif - inode_init_once(&jfs_ip->vfs_inode); - } + inode_init_once(&jfs_ip->vfs_inode); } static int __init init_jfs_fs(void) diff --git a/fs/libfs.c b/fs/libfs.c index d93842d3c0a..5294de1f40c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -159,7 +159,10 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) continue; spin_unlock(&dcache_lock); - if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) + if (filldir(dirent, next->d_name.name, + next->d_name.len, filp->f_pos, + next->d_inode->i_ino, + dt_type(next->d_inode)) < 0) return 0; spin_lock(&dcache_lock); /* next is still alive */ @@ -220,6 +223,12 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, root = new_inode(s); if (!root) goto Enomem; + /* + * since this is the first inode, make it number 1. New inodes created + * after this must take care not to collide with it (by passing + * max_reserved of 1 to iunique). + */ + root->i_ino = 1; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; root->i_uid = root->i_gid = 0; root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; @@ -360,6 +369,11 @@ int simple_commit_write(struct file *file, struct page *page, return 0; } +/* + * the inodes created here are not hashed. If you use iunique to generate + * unique inode values later for this filesystem, then you must take care + * to pass it an appropriate max_reserved value to avoid collisions. + */ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) { struct inode *inode; @@ -376,6 +390,11 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files inode = new_inode(s); if (!inode) return -ENOMEM; + /* + * because the root inode is 1, the files array must not contain an + * entry at index 1 + */ + inode->i_ino = 1; inode->i_mode = S_IFDIR | 0755; inode->i_uid = inode->i_gid = 0; inode->i_blocks = 0; @@ -391,6 +410,13 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files for (i = 0; !files->name || files->name[0]; i++, files++) { if (!files->name) continue; + + /* warn if it tries to conflict with the root inode */ + if (unlikely(i == 1)) + printk(KERN_WARNING "%s: %s passed in a files array" + "with an index of 1!\n", __func__, + s->s_type->name); + dentry = d_alloc_name(root, files->name); if (!dentry) goto out; diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index f4d45d4d835..d070b18e539 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -153,7 +153,7 @@ nlmclnt_recovery(struct nlm_host *host) if (!host->h_reclaiming++) { nlm_get_host(host); __module_get(THIS_MODULE); - if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0) + if (kernel_thread(reclaimer, host, CLONE_FS | CLONE_FILES) < 0) module_put(THIS_MODULE); } } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a5c019e1a44..a10343bed16 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -12,7 +12,6 @@ #include <linux/fs.h> #include <linux/nfs_fs.h> #include <linux/utsname.h> -#include <linux/smp_lock.h> #include <linux/freezer.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> diff --git a/fs/lockd/host.c b/fs/lockd/host.c index ad21c0713ef..96070bff93f 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -221,7 +221,7 @@ nlm_bind_host(struct nlm_host *host) host->h_nextrebind - jiffies); } } else { - unsigned long increment = nlmsvc_timeout * HZ; + unsigned long increment = nlmsvc_timeout; struct rpc_timeout timeparms = { .to_initval = increment, .to_increment = increment, diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9702956d206..5316e307a49 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -586,10 +586,6 @@ static struct rpc_version nlm_version3 = { .procs = nlm_procedures, }; -#ifdef CONFIG_LOCKD_V4 -extern struct rpc_version nlm_version4; -#endif - static struct rpc_version * nlm_versions[] = { [1] = &nlm_version1, [3] = &nlm_version3, diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index ce1efdbe1b3..846fc1d639d 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -123,7 +123,8 @@ static __be32 * nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; - __s64 len, start, end; + __u64 len, start; + __s64 end; if (!(p = xdr_decode_string_inplace(p, &lock->caller, &lock->len, NLM_MAXSTRLEN)) @@ -417,7 +418,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) if (resp->status == nlm_lck_denied) { struct file_lock *fl = &resp->lock.fl; u32 excl; - s64 start, end, len; + __u64 start, len; + __s64 end; memset(&resp->lock, 0, sizeof(resp->lock)); locks_init_lock(fl); diff --git a/fs/locks.c b/fs/locks.c index 671a034dc99..431a8b871fc 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -203,9 +203,6 @@ static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags) { struct file_lock *lock = (struct file_lock *) foo; - if (!(flags & SLAB_CTOR_CONSTRUCTOR)) - return; - locks_init_lock(lock); } @@ -669,7 +666,6 @@ posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; - fl->fl_type = F_UNLCK; lock_kernel(); for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { if (!IS_POSIX(cfl)) @@ -681,7 +677,8 @@ posix_test_lock(struct file *filp, struct file_lock *fl) __locks_copy_lock(fl, cfl); unlock_kernel(); return 1; - } + } else + fl->fl_type = F_UNLCK; unlock_kernel(); return 0; } @@ -1632,6 +1629,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; flock->l_whence = 0; + flock->l_type = fl->fl_type; return 0; } diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index c4a554df7b7..99a12f12776 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -15,6 +15,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include <linux/sched.h> static int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2f4d43a2a31..be4044614ac 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -73,8 +73,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct minix_inode_info *ei = (struct minix_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/mpage.c b/fs/mpage.c index 692a3e578fc..c1698f2291a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -284,11 +284,9 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, } if (first_hole != blocks_per_page) { - char *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + (first_hole << blkbits), 0, - PAGE_CACHE_SIZE - (first_hole << blkbits)); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, first_hole << blkbits, + PAGE_CACHE_SIZE - (first_hole << blkbits), + KM_USER0); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); @@ -456,11 +454,18 @@ EXPORT_SYMBOL(mpage_readpage); * written, so it can intelligently allocate a suitably-sized BIO. For now, * just allocate full-size (16-page) BIOs. */ -static struct bio * -__mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, - sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc, - writepage_t writepage_fn) +struct mpage_data { + struct bio *bio; + sector_t last_block_in_bio; + get_block_t *get_block; + unsigned use_writepage; +}; + +static int __mpage_writepage(struct page *page, struct writeback_control *wbc, + void *data) { + struct mpage_data *mpd = data; + struct bio *bio = mpd->bio; struct address_space *mapping = page->mapping; struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -478,6 +483,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, int length; struct buffer_head map_bh; loff_t i_size = i_size_read(inode); + int ret = 0; if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -540,7 +546,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, map_bh.b_state = 0; map_bh.b_size = 1 << blkbits; - if (get_block(inode, block_in_file, &map_bh, 1)) + if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) unmap_underlying_metadata(map_bh.b_bdev, @@ -576,20 +582,17 @@ page_is_mapped: * written out to the file." */ unsigned offset = i_size & (PAGE_CACHE_SIZE - 1); - char *kaddr; if (page->index > end_index || !offset) goto confused; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, + KM_USER0); } /* * This page will go to BIO. Do we need to send this BIO off first? */ - if (bio && *last_block_in_bio != blocks[0] - 1) + if (bio && mpd->last_block_in_bio != blocks[0] - 1) bio = mpage_bio_submit(WRITE, bio); alloc_new: @@ -646,7 +649,7 @@ alloc_new: boundary_block, 1 << blkbits); } } else { - *last_block_in_bio = blocks[blocks_per_page - 1]; + mpd->last_block_in_bio = blocks[blocks_per_page - 1]; } goto out; @@ -654,23 +657,19 @@ confused: if (bio) bio = mpage_bio_submit(WRITE, bio); - if (writepage_fn) { - *ret = (*writepage_fn)(page, wbc); + if (mpd->use_writepage) { + ret = mapping->a_ops->writepage(page, wbc); } else { - *ret = -EAGAIN; + ret = -EAGAIN; goto out; } /* * The caller has a ref on the inode, so *mapping is stable */ - if (*ret) { - if (*ret == -ENOSPC) - set_bit(AS_ENOSPC, &mapping->flags); - else - set_bit(AS_EIO, &mapping->flags); - } + mapping_set_error(mapping, ret); out: - return bio; + mpd->bio = bio; + return ret; } /** @@ -693,127 +692,27 @@ out: * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. - * - * If you fix this you should check generic_writepages() also! */ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) { - struct backing_dev_info *bdi = mapping->backing_dev_info; - struct bio *bio = NULL; - sector_t last_block_in_bio = 0; - int ret = 0; - int done = 0; - int (*writepage)(struct page *page, struct writeback_control *wbc); - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; /* Inclusive */ - int scanned = 0; - int range_whole = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - writepage = NULL; - if (get_block == NULL) - writepage = mapping->a_ops->writepage; - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; - } -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - if (writepage) { - ret = (*writepage)(page, wbc); - if (ret) { - if (ret == -ENOSPC) - set_bit(AS_ENOSPC, - &mapping->flags); - else - set_bit(AS_EIO, - &mapping->flags); - } - } else { - bio = __mpage_writepage(bio, page, get_block, - &last_block_in_bio, &ret, wbc, - page->mapping->a_ops->writepage); - } - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) - unlock_page(page); - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - } - } - pagevec_release(&pvec); - cond_resched(); + int ret; + + if (!get_block) + ret = generic_writepages(mapping, wbc); + else { + struct mpage_data mpd = { + .bio = NULL, + .last_block_in_bio = 0, + .get_block = get_block, + .use_writepage = 1, + }; + + ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); + if (mpd.bio) + mpage_bio_submit(WRITE, mpd.bio); } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - if (bio) - mpage_bio_submit(WRITE, bio); return ret; } EXPORT_SYMBOL(mpage_writepages); @@ -821,15 +720,15 @@ EXPORT_SYMBOL(mpage_writepages); int mpage_writepage(struct page *page, get_block_t get_block, struct writeback_control *wbc) { - int ret = 0; - struct bio *bio; - sector_t last_block_in_bio = 0; - - bio = __mpage_writepage(NULL, page, get_block, - &last_block_in_bio, &ret, wbc, NULL); - if (bio) - mpage_bio_submit(WRITE, bio); - + struct mpage_data mpd = { + .bio = NULL, + .last_block_in_bio = 0, + .get_block = get_block, + .use_writepage = 0, + }; + int ret = __mpage_writepage(page, wbc, &mpd); + if (mpd.bio) + mpage_bio_submit(WRITE, mpd.bio); return ret; } EXPORT_SYMBOL(mpage_writepage); diff --git a/fs/namei.c b/fs/namei.c index 94b2f60aec2..5e2d98d10c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -22,7 +22,6 @@ #include <linux/quotaops.h> #include <linux/pagemap.h> #include <linux/fsnotify.h> -#include <linux/smp_lock.h> #include <linux/personality.h> #include <linux/security.h> #include <linux/syscalls.h> @@ -1153,14 +1152,12 @@ static int fastcall do_path_lookup(int dfd, const char *name, fput_light(file, fput_needed); } - current->total_link_count = 0; - retval = link_path_walk(name, nd); + + retval = path_walk(name, nd); out: - if (likely(retval == 0)) { - if (unlikely(!audit_dummy_context() && nd && nd->dentry && + if (unlikely(!retval && !audit_dummy_context() && nd->dentry && nd->dentry->d_inode)) audit_inode(name, nd->dentry->d_inode); - } out_fail: return retval; @@ -1350,17 +1347,6 @@ struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int le return __lookup_hash_kern(&this, base, NULL); } -/* - * namei() - * - * is used by most simple commands to get the inode of a specified name. - * Open, link etc use their own routines, but this is enough for things - * like 'chmod' etc. - * - * namei exists in two versions: namei/lnamei. The only difference is - * that namei follows links, while lnamei does not. - * SMP-safe - */ int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, struct nameidata *nd) { @@ -1733,7 +1719,7 @@ do_last: * It already exists. */ mutex_unlock(&dir->d_inode->i_mutex); - audit_inode_update(path.dentry->d_inode); + audit_inode(pathname, path.dentry->d_inode); error = -EEXIST; if (flag & O_EXCL) diff --git a/fs/namespace.c b/fs/namespace.c index fd999cab7b5..b696e3a0d18 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -377,6 +377,10 @@ static int show_vfsmnt(struct seq_file *m, void *v) seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); seq_putc(m, ' '); mangle(m, mnt->mnt_sb->s_type->name); + if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) { + seq_putc(m, '.'); + mangle(m, mnt->mnt_sb->s_subtype); + } seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { if (mnt->mnt_sb->s_flags & fs_infop->flag) @@ -495,7 +499,7 @@ void release_mounts(struct list_head *head) { struct vfsmount *mnt; while (!list_empty(head)) { - mnt = list_entry(head->next, struct vfsmount, mnt_hash); + mnt = list_first_entry(head, struct vfsmount, mnt_hash); list_del_init(&mnt->mnt_hash); if (mnt->mnt_parent != mnt) { struct dentry *dentry; @@ -882,6 +886,9 @@ static int do_change_type(struct nameidata *nd, int flag) int recurse = flag & MS_REC; int type = flag & ~MS_REC; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; @@ -1173,7 +1180,7 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou while (!list_empty(graveyard)) { LIST_HEAD(umounts); - mnt = list_entry(graveyard->next, struct vfsmount, mnt_expire); + mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire); list_del_init(&mnt->mnt_expire); /* don't do anything if the namespace is dead - all the @@ -1441,10 +1448,9 @@ dput_out: * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. */ -struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk, +static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, struct fs_struct *fs) { - struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns; struct mnt_namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; struct vfsmount *p, *q; @@ -1509,36 +1515,21 @@ struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk, return new_ns; } -int copy_mnt_ns(int flags, struct task_struct *tsk) +struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns, + struct fs_struct *new_fs) { - struct mnt_namespace *ns = tsk->nsproxy->mnt_ns; struct mnt_namespace *new_ns; - int err = 0; - - if (!ns) - return 0; + BUG_ON(!ns); get_mnt_ns(ns); if (!(flags & CLONE_NEWNS)) - return 0; + return ns; - if (!capable(CAP_SYS_ADMIN)) { - err = -EPERM; - goto out; - } - - new_ns = dup_mnt_ns(tsk, tsk->fs); - if (!new_ns) { - err = -ENOMEM; - goto out; - } + new_ns = dup_mnt_ns(ns, new_fs); - tsk->nsproxy->mnt_ns = new_ns; - -out: put_mnt_ns(ns); - return err; + return new_ns; } asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name, diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 6b1f6d27099..d3152f8d95c 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -17,7 +17,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/vmalloc.h> -#include <linux/smp_lock.h> +#include <linux/sched.h> #include <linux/ncp_fs.h> #include "ncplib_kernel.h" diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index c29f00ad495..cf06eb9f050 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -60,10 +60,8 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - mutex_init(&ei->open_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->open_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 8843a83d4ef..c67b4bdcf71 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -17,6 +17,7 @@ #include <linux/highuid.h> #include <linux/smp_lock.h> #include <linux/vmalloc.h> +#include <linux/sched.h> #include <linux/ncp_fs.h> diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index db3d7919c60..c2bb14e053e 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -24,7 +24,7 @@ enum nfs4_callback_opnum { }; struct cb_compound_hdr_arg { - int taglen; + unsigned int taglen; const char *tag; unsigned int callback_ident; unsigned nops; @@ -32,7 +32,7 @@ struct cb_compound_hdr_arg { struct cb_compound_hdr_res { __be32 *status; - int taglen; + unsigned int taglen; const char *tag; __be32 *nops; }; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5bd03b97002..881fa490092 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -12,7 +12,7 @@ #include <linux/module.h> #include <linux/init.h> - +#include <linux/sched.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -27,7 +27,6 @@ #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> -#include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/nfs_idmap.h> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 841c99a9b11..7f37d1bea83 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -226,7 +226,7 @@ restart: spin_unlock(&clp->cl_lock); } -int nfs_do_expire_all_delegations(void *ptr) +static int nfs_do_expire_all_delegations(void *ptr) { struct nfs_client *clp = ptr; struct nfs_delegation *delegation; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 625d8e5fb39..c27258b5d3e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,12 +33,12 @@ #include <linux/pagevec.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/sched.h> #include "nfs4_fs.h" #include "delegation.h" #include "iostat.h" -#define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ static int nfs_opendir(struct inode *, struct file *); @@ -608,7 +608,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) return res; } -loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) { mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); switch (origin) { @@ -634,7 +634,7 @@ out: * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. */ -int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) +static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -650,12 +650,15 @@ int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) */ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { + unsigned long verf; + if (IS_ROOT(dentry)) return 1; - if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0 - || nfs_attribute_timeout(dir)) + verf = (unsigned long)dentry->d_fsdata; + if (nfs_caches_unstable(dir) + || verf != NFS_I(dir)->cache_change_attribute) return 0; - return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); + return 1; } static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) @@ -665,8 +668,7 @@ static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf) { - if (time_after(verf, (unsigned long)dentry->d_fsdata)) - nfs_set_verifier(dentry, verf); + nfs_set_verifier(dentry, verf); } /* @@ -765,6 +767,10 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; + /* Revalidate parent directory attribute cache */ + if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + goto out_zap_parent; + if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; @@ -778,10 +784,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) goto out_bad; } - /* Revalidate parent directory attribute cache */ - if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) - goto out_zap_parent; - /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, nd)) @@ -1360,11 +1362,6 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) atomic_read(&dentry->d_count)); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); -#ifdef NFS_PARANOIA -if (!dentry->d_inode) -printk("NFS: silly-renaming %s/%s, negative dentry??\n", -dentry->d_parent->d_name.name, dentry->d_name.name); -#endif /* * We don't allow a dentry to be silly-renamed twice. */ @@ -1681,16 +1678,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode = NULL; /* instantiate the replacement target */ d_instantiate(new_dentry, NULL); - } else if (atomic_read(&new_dentry->d_count) > 1) { - /* dentry still busy? */ -#ifdef NFS_PARANOIA - printk("nfs_rename: target %s/%s busy, d_count=%d\n", - new_dentry->d_parent->d_name.name, - new_dentry->d_name.name, - atomic_read(&new_dentry->d_count)); -#endif + } else if (atomic_read(&new_dentry->d_count) > 1) + /* dentry still busy? */ goto out; - } } else drop_nlink(new_inode); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 889de60f8a8..00eee87510f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -41,7 +41,6 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> -#include <linux/smp_lock.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/kref.h> @@ -123,19 +122,25 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_direct_dirty_pages(struct page **pages, int npages) +static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count) { - int i; + unsigned int npages; + unsigned int i; + + if (count == 0) + return; + pages += (pgbase >> PAGE_SHIFT); + npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { struct page *page = pages[i]; if (!PageCompound(page)) - set_page_dirty_lock(page); + set_page_dirty(page); } } -static void nfs_direct_release_pages(struct page **pages, int npages) +static void nfs_direct_release_pages(struct page **pages, unsigned int npages) { - int i; + unsigned int i; for (i = 0; i < npages; i++) page_cache_release(pages[i]); } @@ -163,7 +168,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) return dreq; } -static void nfs_direct_req_release(struct kref *kref) +static void nfs_direct_req_free(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); @@ -172,6 +177,11 @@ static void nfs_direct_req_release(struct kref *kref) kmem_cache_free(nfs_direct_cachep, dreq); } +static void nfs_direct_req_release(struct nfs_direct_req *dreq) +{ + kref_put(&dreq->kref, nfs_direct_req_free); +} + /* * Collects and returns the final error value/byte-count. */ @@ -191,7 +201,6 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) result = dreq->count; out: - kref_put(&dreq->kref, nfs_direct_req_release); return (ssize_t) result; } @@ -209,7 +218,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) } complete_all(&dreq->completion); - kref_put(&dreq->kref, nfs_direct_req_release); + nfs_direct_req_release(dreq); } /* @@ -225,17 +234,18 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (nfs_readpage_result(task, data) != 0) return; - nfs_direct_dirty_pages(data->pagevec, data->npages); - nfs_direct_release_pages(data->pagevec, data->npages); - spin_lock(&dreq->lock); - - if (likely(task->tk_status >= 0)) - dreq->count += data->res.count; - else + if (unlikely(task->tk_status < 0)) { dreq->error = task->tk_status; - - spin_unlock(&dreq->lock); + spin_unlock(&dreq->lock); + } else { + dreq->count += data->res.count; + spin_unlock(&dreq->lock); + nfs_direct_dirty_pages(data->pagevec, + data->args.pgbase, + data->res.count); + } + nfs_direct_release_pages(data->pagevec, data->npages); if (put_dreq(dreq)) nfs_direct_complete(dreq); @@ -280,9 +290,12 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo result = get_user_pages(current, current->mm, user_addr, data->npages, 1, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) { - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); + if (result < 0) { + nfs_readdata_release(data); + break; + } + if ((unsigned)result < data->npages) { + nfs_direct_release_pages(data->pagevec, result); nfs_readdata_release(data); break; } @@ -360,6 +373,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size if (!result) result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); + nfs_direct_req_release(dreq); return result; } @@ -611,9 +625,12 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l result = get_user_pages(current, current->mm, user_addr, data->npages, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) { - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); + if (result < 0) { + nfs_writedata_release(data); + break; + } + if ((unsigned)result < data->npages) { + nfs_direct_release_pages(data->pagevec, result); nfs_writedata_release(data); break; } @@ -704,6 +721,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz if (!result) result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); + nfs_direct_req_release(dreq); return result; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5eaee6dd040..9eb8eb4e4a0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/smp_lock.h> +#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 6ef268f7c30..d1cbf0a0fbb 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -25,7 +25,6 @@ #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> -#include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/nfs_idmap.h> @@ -42,7 +41,6 @@ #include "internal.h" #define NFSDBG_FACILITY NFSDBG_CLIENT -#define NFS_PARANOIA 1 /* * get an NFS2/NFS3 root dentry from the root filehandle diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 9d4a6b2d199..d11eb055265 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -272,7 +272,7 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, set_current_state(TASK_UNINTERRUPTIBLE); mutex_unlock(&idmap->idmap_im_lock); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&idmap->idmap_wq, &wq); mutex_lock(&idmap->idmap_im_lock); @@ -333,7 +333,7 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, set_current_state(TASK_UNINTERRUPTIBLE); mutex_unlock(&idmap->idmap_im_lock); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&idmap->idmap_wq, &wq); mutex_lock(&idmap->idmap_im_lock); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1e9a915d1fe..bd9f5a83659 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/init.h> - +#include <linux/sched.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -48,7 +48,6 @@ #include "internal.h" #define NFSDBG_FACILITY NFSDBG_VFS -#define NFS_PARANOIA 1 static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); @@ -1075,10 +1074,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Big trouble! The inode has become a different object. */ -#ifdef NFS_PARANOIA printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); -#endif out_err: /* * No need to worry about unhashing the dentry, as the @@ -1167,21 +1164,19 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct nfs_inode *nfsi = (struct nfs_inode *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&nfsi->vfs_inode); - spin_lock_init(&nfsi->req_lock); - INIT_LIST_HEAD(&nfsi->dirty); - INIT_LIST_HEAD(&nfsi->commit); - INIT_LIST_HEAD(&nfsi->open_files); - INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); - INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); - atomic_set(&nfsi->data_updates, 0); - nfsi->ndirty = 0; - nfsi->ncommit = 0; - nfsi->npages = 0; - nfs4_init_once(nfsi); - } + inode_init_once(&nfsi->vfs_inode); + spin_lock_init(&nfsi->req_lock); + INIT_LIST_HEAD(&nfsi->dirty); + INIT_LIST_HEAD(&nfsi->commit); + INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); + INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); + nfsi->ndirty = 0; + nfsi->ncommit = 0; + nfsi->npages = 0; + nfs4_init_once(nfsi); } static int __init nfs_init_inodecache(void) diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index abd9f8b4894..cd3ca7b5d3d 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -26,7 +26,6 @@ #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR -/* #define NFS_PARANOIA 1 */ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7d0371e2bad..45268d6def2 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -16,7 +16,6 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> #include <linux/lockd/bind.h> -#include <linux/smp_lock.h> #include <linux/nfs_mount.h> #include "iostat.h" diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d6a30e96578..648e0ac0f90 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -790,7 +790,7 @@ out: return -EACCES; } -int nfs4_recover_expired_lease(struct nfs_server *server) +static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; int ret; @@ -2748,7 +2748,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; int ret = errorcode; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index f5f4430fb2a..0505ca12403 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -43,7 +43,6 @@ * child task framework of the RPC layer? */ -#include <linux/smp_lock.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/sunrpc/sched.h> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5fffbdfa971..8ed79d5c54f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -104,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) return cred; } -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b8c28f2380a..8003c91ccb9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -224,7 +224,8 @@ static int nfs4_stat_to_errno(int); encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 3) + op_decode_hdr_maxsz + 3 + \ + nfs4_fattr_maxsz) #define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_fsinfo_maxsz) @@ -645,10 +646,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) { __be32 *p; - RESERVE_SPACE(8+sizeof(arg->stateid->data)); + RESERVE_SPACE(8+NFS4_STATEID_SIZE); WRITE32(OP_CLOSE); WRITE32(arg->seqid->sequence->counter); - WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); + WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); return 0; } @@ -792,17 +793,17 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) WRITE64(nfs4_lock_length(args->fl)); WRITE32(args->new_lock_owner); if (args->new_lock_owner){ - RESERVE_SPACE(40); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); WRITE32(args->open_seqid->sequence->counter); - WRITEMEM(args->open_stateid->data, sizeof(args->open_stateid->data)); + WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); WRITE64(args->lock_owner.clientid); WRITE32(4); WRITE32(args->lock_owner.id); } else { - RESERVE_SPACE(20); - WRITEMEM(args->lock_stateid->data, sizeof(args->lock_stateid->data)); + RESERVE_SPACE(NFS4_STATEID_SIZE+4); + WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); } @@ -829,11 +830,11 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg { __be32 *p; - RESERVE_SPACE(44); + RESERVE_SPACE(12+NFS4_STATEID_SIZE+16); WRITE32(OP_LOCKU); WRITE32(nfs4_lock_type(args->fl, 0)); WRITE32(args->seqid->sequence->counter); - WRITEMEM(args->stateid->data, sizeof(args->stateid->data)); + WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); @@ -965,9 +966,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc { __be32 *p; - RESERVE_SPACE(4+sizeof(stateid->data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); - WRITEMEM(stateid->data, sizeof(stateid->data)); + WRITEMEM(stateid->data, NFS4_STATEID_SIZE); encode_string(xdr, name->len, name->name); } @@ -995,9 +996,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con { __be32 *p; - RESERVE_SPACE(8+sizeof(arg->stateid->data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); WRITE32(OP_OPEN_CONFIRM); - WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); + WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); return 0; @@ -1007,9 +1008,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea { __be32 *p; - RESERVE_SPACE(8+sizeof(arg->stateid->data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); WRITE32(OP_OPEN_DOWNGRADE); - WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); + WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; @@ -1044,12 +1045,12 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context nfs4_stateid stateid; __be32 *p; - RESERVE_SPACE(16); + RESERVE_SPACE(NFS4_STATEID_SIZE); if (ctx->state != NULL) { nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); - WRITEMEM(stateid.data, sizeof(stateid.data)); + WRITEMEM(stateid.data, NFS4_STATEID_SIZE); } else - WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); } static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) @@ -1078,10 +1079,10 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg int replen; __be32 *p; - RESERVE_SPACE(32+sizeof(nfs4_verifier)); + RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20); WRITE32(OP_READDIR); WRITE64(readdir->cookie); - WRITEMEM(readdir->verifier.data, sizeof(readdir->verifier.data)); + WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE); WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ WRITE32(readdir->count); WRITE32(2); @@ -1189,9 +1190,9 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) { __be32 *p; - RESERVE_SPACE(4+sizeof(zero_stateid.data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_SETATTR); - WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); RESERVE_SPACE(2*4); WRITE32(1); WRITE32(FATTR4_WORD0_ACL); @@ -1219,9 +1220,9 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs * int status; __be32 *p; - RESERVE_SPACE(4+sizeof(arg->stateid.data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_SETATTR); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); if ((status = encode_attrs(xdr, arg->iap, server))) return status; @@ -1233,9 +1234,9 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien { __be32 *p; - RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data)); + RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE); WRITE32(OP_SETCLIENTID); - WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data)); + WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); RESERVE_SPACE(4); @@ -1252,10 +1253,10 @@ static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_c { __be32 *p; - RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data)); + RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); WRITE32(OP_SETCLIENTID_CONFIRM); WRITE64(client_state->cl_clientid); - WRITEMEM(client_state->cl_confirm.data, sizeof(client_state->cl_confirm.data)); + WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); return 0; } @@ -1283,10 +1284,10 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei { __be32 *p; - RESERVE_SPACE(20); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_DELEGRETURN); - WRITEMEM(stateid->data, sizeof(stateid->data)); + WRITEMEM(stateid->data, NFS4_STATEID_SIZE); return 0; } @@ -2079,9 +2080,11 @@ out: #define READ_BUF(nbytes) do { \ p = xdr_inline_decode(xdr, nbytes); \ - if (!p) { \ - printk(KERN_WARNING "%s: reply buffer overflowed in line %d.", \ - __FUNCTION__, __LINE__); \ + if (unlikely(!p)) { \ + printk(KERN_INFO "%s: prematurely hit end of receive" \ + " buffer\n", __FUNCTION__); \ + printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ + __FUNCTION__, xdr->p, nbytes, xdr->end); \ return -EIO; \ } \ } while (0) @@ -2491,7 +2494,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st int i; dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); for (i = loc->nservers; i < m; i++) { - int len; + unsigned int len; char *data; status = decode_opaque_inline(xdr, &len, &data); if (unlikely(status != 0)) @@ -2639,7 +2642,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t return 0; } -static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid) +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) { uint32_t len; __be32 *p; @@ -2664,7 +2667,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf return 0; } -static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid) +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) { uint32_t len; __be32 *p; @@ -2894,8 +2897,8 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) status = decode_op_hdr(xdr, OP_CLOSE); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); return 0; } @@ -3183,8 +3186,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) status = decode_op_hdr(xdr, OP_LOCK); if (status == 0) { - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); } else if (status == -NFS4ERR_DENIED) return decode_lock_denied(xdr, NULL); return status; @@ -3206,8 +3209,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) status = decode_op_hdr(xdr, OP_LOCKU); if (status == 0) { - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); } return status; } @@ -3248,8 +3251,8 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) res->delegation_type = 0; return 0; } - READ_BUF(20); - COPYMEM(res->delegation.data, sizeof(res->delegation.data)); + READ_BUF(NFS4_STATEID_SIZE+4); + COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); READ32(res->do_recall); switch (delegation_type) { case NFS4_OPEN_DELEGATE_READ: @@ -3272,8 +3275,8 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) status = decode_op_hdr(xdr, OP_OPEN); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); decode_change_info(xdr, &res->cinfo); @@ -3299,8 +3302,8 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); return 0; } @@ -3312,8 +3315,8 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); return 0; } @@ -3587,9 +3590,9 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) } READ32(nfserr); if (nfserr == NFS_OK) { - READ_BUF(8 + sizeof(clp->cl_confirm.data)); + READ_BUF(8 + NFS4_VERIFIER_SIZE); READ64(clp->cl_clientid); - COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data)); + COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE); } else if (nfserr == NFSERR_CLID_INUSE) { uint32_t len; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 388950118f5..c5bb51a29e8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/file.h> +#include <linux/sched.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs3.h> #include <linux/nfs4.h> @@ -20,8 +21,6 @@ #include "internal.h" -#define NFS_PARANOIA 1 - static struct kmem_cache *nfs_page_cachep; static inline struct nfs_page * @@ -167,11 +166,6 @@ nfs_release_request(struct nfs_page *req) if (!atomic_dec_and_test(&req->wb_count)) return; -#ifdef NFS_PARANOIA - BUG_ON (!list_empty(&req->wb_list)); - BUG_ON (NFS_WBACK_BUSY(req)); -#endif - /* Release struct file or cached credential */ nfs_clear_request(req); put_nfs_open_context(req->wb_context); @@ -361,6 +355,26 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) nfs_pageio_doio(desc); } +/** + * nfs_pageio_cond_complete - Conditional I/O completion + * @desc: pointer to io descriptor + * @index: page index + * + * It is important to ensure that processes don't try to take locks + * on non-contiguous ranges of pages as that might deadlock. This + * function should be called before attempting to wait on a locked + * nfs_page. It will complete the I/O if the page index 'index' + * is not contiguous with the existing list of pages in 'desc'. + */ +void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) +{ + if (!list_empty(&desc->pg_list)) { + struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); + if (index != prev->wb_index + 1) + nfs_pageio_doio(desc); + } +} + #define NFS_SCAN_MAXENTRIES 16 /** * nfs_scan_list - Scan a list for matching requests diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 1dcf56de948..7be0ee2782c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -43,7 +43,6 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> #include <linux/lockd/bind.h> -#include <linux/smp_lock.h> #include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9a55807b2a7..7bd7cb95c03 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -79,7 +79,7 @@ void nfs_readdata_release(void *data) static int nfs_return_empty_page(struct page *page) { - memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); SetPageUptodate(page); unlock_page(page); return 0; @@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) pglen = PAGE_CACHE_SIZE - base; for (;;) { if (remainder <= pglen) { - memclear_highpage_flush(*pages, base, remainder); + zero_user_page(*pages, base, remainder, KM_USER0); break; } - memclear_highpage_flush(*pages, base, pglen); + zero_user_page(*pages, base, pglen, KM_USER0); pages++; remainder -= pglen; pglen = PAGE_CACHE_SIZE; @@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, return PTR_ERR(new); } if (len < PAGE_CACHE_SIZE) - memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); + zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_list_add_request(new, &one_request); if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) @@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page) return PTR_ERR(new); } if (len < PAGE_CACHE_SIZE) - memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); + zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_pageio_add_request(desc->pgio, new); return 0; } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index bc2821331c2..83e865a16ad 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -22,7 +22,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/namei.h> /* Symlink caching in the page cache is even more simplistic diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5d44b8bd107..af344a158e0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -21,7 +21,6 @@ #include <linux/backing-dev.h> #include <asm/uaccess.h> -#include <linux/smp_lock.h> #include "delegation.h" #include "internal.h" @@ -59,7 +58,7 @@ struct nfs_write_data *nfs_commit_alloc(void) return p; } -void nfs_commit_rcu_free(struct rcu_head *head) +static void nfs_commit_rcu_free(struct rcu_head *head) { struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) @@ -169,7 +168,7 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int if (count != nfs_page_length(page)) return; if (count != PAGE_CACHE_SIZE) - memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); + zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0); SetPageUptodate(page); } @@ -225,7 +224,7 @@ static int nfs_set_page_writeback(struct page *page) struct inode *inode = page->mapping->host; struct nfs_server *nfss = NFS_SERVER(inode); - if (atomic_inc_return(&nfss->writeback) > + if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) set_bdi_congested(&nfss->backing_dev_info, WRITE); } @@ -238,7 +237,7 @@ static void nfs_end_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); - if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { + if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { clear_bdi_congested(&nfss->backing_dev_info, WRITE); congestion_end(WRITE); } @@ -274,8 +273,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, * request as dirty (in which case we don't care). */ spin_unlock(req_lock); - /* Prevent deadlock! */ - nfs_pageio_complete(pgio); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) @@ -322,6 +319,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc pgio = &mypgio; } + nfs_pageio_cond_complete(pgio, page->index); + err = nfs_page_async_flush(pgio, page); if (err <= 0) goto out; @@ -330,6 +329,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc if (!offset) goto out; + nfs_pageio_cond_complete(pgio, page->index); + ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); if (ctx == NULL) { err = -EBADF; @@ -923,7 +924,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i return 0; out_bad: while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); + req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index ce341dc76d5..9b118ee2019 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -11,4 +11,3 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfs4recover.o -nfsd-objs := $(nfsd-y) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 6f24768272a..79bd03b8bbf 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -469,6 +469,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) nd.dentry = NULL; exp.ex_path = NULL; + /* fs locations */ + exp.ex_fslocs.locations = NULL; + exp.ex_fslocs.locations_count = 0; + exp.ex_fslocs.migrated = 0; + + exp.ex_uuid = NULL; + if (mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; @@ -509,13 +516,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (exp.h.expiry_time == 0) goto out; - /* fs locations */ - exp.ex_fslocs.locations = NULL; - exp.ex_fslocs.locations_count = 0; - exp.ex_fslocs.migrated = 0; - - exp.ex_uuid = NULL; - /* flags */ err = get_int(&mesg, &an_int); if (err == -ENOENT) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7f5bad0393b..eac82830bfd 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -177,7 +177,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, if (max_blocksize < resp->count) resp->count = max_blocksize; - svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); + svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_read(rqstp, &resp->fh, NULL, diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7e4bb0af24d..10f6e7dcf63 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -239,7 +239,7 @@ static __be32 * encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; - if (dentry && dentry->d_inode != NULL) { + if (dentry && dentry->d_inode) { int err; struct kstat stat; @@ -300,9 +300,9 @@ int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_sattr3(p, &args->attrs))) + if (!(p = decode_fh(p, &args->fh))) return 0; + p = decode_sattr3(p, &args->attrs); if ((args->check_guard = ntohl(*p++)) != 0) { struct timespec time; @@ -343,9 +343,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, int v,pn; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh)) - || !(p = xdr_decode_hyper(p, &args->offset))) + if (!(p = decode_fh(p, &args->fh))) return 0; + p = xdr_decode_hyper(p, &args->offset); len = args->count = ntohl(*p++); @@ -369,28 +369,44 @@ int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_writeargs *args) { - unsigned int len, v, hdr; + unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh)) - || !(p = xdr_decode_hyper(p, &args->offset))) + if (!(p = decode_fh(p, &args->fh))) return 0; + p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); args->stable = ntohl(*p++); len = args->len = ntohl(*p++); + /* + * The count must equal the amount of data passed. + */ + if (args->count != args->len) + return 0; + /* + * Check to make sure that we got the right number of + * bytes. + */ hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - if (rqstp->rq_arg.len < hdr || - rqstp->rq_arg.len - hdr < len) + dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + - hdr; + /* + * Round the length of the data which was specified up to + * the next multiple of XDR units and then compare that + * against the length which was actually received. + */ + if (dlen != XDR_QUADLEN(len)*4) return 0; + if (args->count > max_blocksize) { + args->count = max_blocksize; + len = args->len = max_blocksize; + } rqstp->rq_vec[0].iov_base = (void*)p; rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; - - if (len > max_blocksize) - len = max_blocksize; - v= 0; + v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; v++; @@ -398,9 +414,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, rqstp->rq_vec[v].iov_len = PAGE_SIZE; } rqstp->rq_vec[v].iov_len = len; - args->vlen = v+1; - - return args->count == args->len && rqstp->rq_vec[0].iov_len > 0; + args->vlen = v + 1; + return 1; } int @@ -414,8 +429,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, switch (args->createmode = ntohl(*p++)) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: - if (!(p = decode_sattr3(p, &args->attrs))) - return 0; + p = decode_sattr3(p, &args->attrs); break; case NFS3_CREATE_EXCLUSIVE: args->verf = p; @@ -431,10 +445,10 @@ int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_createargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len)) - || !(p = decode_sattr3(p, &args->attrs))) + if (!(p = decode_fh(p, &args->fh)) || + !(p = decode_filename(p, &args->name, &args->len))) return 0; + p = decode_sattr3(p, &args->attrs); return xdr_argsize_check(rqstp, p); } @@ -448,11 +462,12 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, char *old, *new; struct kvec *vec; - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_sattr3(p, &args->attrs)) + if (!(p = decode_fh(p, &args->ffh)) || + !(p = decode_filename(p, &args->fname, &args->flen)) ) return 0; + p = decode_sattr3(p, &args->attrs); + /* now decode the pathname, which might be larger than the first page. * As we have to check for nul's anyway, we copy it into a new page * This page appears in the rq_res.pages list, but as pages_len is always @@ -502,10 +517,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p, args->ftype = ntohl(*p++); if (args->ftype == NF3BLK || args->ftype == NF3CHR - || args->ftype == NF3SOCK || args->ftype == NF3FIFO) { - if (!(p = decode_sattr3(p, &args->attrs))) - return 0; - } + || args->ftype == NF3SOCK || args->ftype == NF3FIFO) + p = decode_sattr3(p, &args->attrs); if (args->ftype == NF3BLK || args->ftype == NF3CHR) { args->major = ntohl(*p++); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 673a53c014a..cc3b7badd48 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -137,7 +137,6 @@ struct ace_container { static short ace2type(struct nfs4_ace *); static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); -void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); struct nfs4_acl * nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, @@ -785,21 +784,6 @@ nfs4_acl_new(int n) return acl; } -void -nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, - int whotype, uid_t who) -{ - struct nfs4_ace *ace = acl->aces + acl->naces; - - ace->type = type; - ace->flag = flag; - ace->access_mask = access_mask; - ace->whotype = whotype; - ace->who = who; - - acl->naces++; -} - static struct { char *string; int stringlen; @@ -851,6 +835,5 @@ nfs4_acl_write_who(int who, char *p) } EXPORT_SYMBOL(nfs4_acl_new); -EXPORT_SYMBOL(nfs4_acl_add_ace); EXPORT_SYMBOL(nfs4_acl_get_whotype); EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 32ffea033c7..864090edc28 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -38,6 +38,7 @@ #include <linux/inet.h> #include <linux/errno.h> #include <linux/delay.h> +#include <linux/sched.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index e4a83d727af..45aa21ce678 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -46,7 +46,6 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> -#include <linux/smp_lock.h> #include <linux/sunrpc/cache.h> #include <linux/nfsd_idmap.h> #include <linux/list.h> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index c7774e3a946..ebd03cc0747 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -45,7 +45,7 @@ #include <asm/uaccess.h> #include <asm/scatterlist.h> #include <linux/crypto.h> - +#include <linux/sched.h> #define NFSDDBG_FACILITY NFSDDBG_PROC diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 678f3be88ac..3cc8ce422ab 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1326,8 +1326,6 @@ do_recall(void *__dp) { struct nfs4_delegation *dp = __dp; - daemonize("nfsv4-recall"); - nfsd4_cb_recall(dp); return 0; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5d090f11f2b..15809dfd88a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -44,7 +44,6 @@ #include <linux/param.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/vfs.h> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8d995bcef80..6ca2d24fc21 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -10,7 +10,6 @@ */ #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/unistd.h> #include <linux/string.h> @@ -324,7 +323,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, * */ - u8 version = 1; + u8 version; u8 fsid_type = 0; struct inode * inode = dentry->d_inode; struct dentry *parent = dentry->d_parent; @@ -342,15 +341,59 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, * the reference filehandle (if it is in the same export) * or the export options. */ + retry: + version = 1; if (ref_fh && ref_fh->fh_export == exp) { version = ref_fh->fh_handle.fh_version; - if (version == 0xca) + fsid_type = ref_fh->fh_handle.fh_fsid_type; + + if (ref_fh == fhp) + fh_put(ref_fh); + ref_fh = NULL; + + switch (version) { + case 0xca: fsid_type = FSID_DEV; - else - fsid_type = ref_fh->fh_handle.fh_fsid_type; - /* We know this version/type works for this export - * so there is no need for further checks. + break; + case 1: + break; + default: + goto retry; + } + + /* Need to check that this type works for this + * export point. As the fsid -> filesystem mapping + * was guided by user-space, there is no guarantee + * that the filesystem actually supports that fsid + * type. If it doesn't we loop around again without + * ref_fh set. */ + switch(fsid_type) { + case FSID_DEV: + if (!old_valid_dev(ex_dev)) + goto retry; + /* FALL THROUGH */ + case FSID_MAJOR_MINOR: + case FSID_ENCODE_DEV: + if (!(exp->ex_dentry->d_inode->i_sb->s_type->fs_flags + & FS_REQUIRES_DEV)) + goto retry; + break; + case FSID_NUM: + if (! (exp->ex_flags & NFSEXP_FSID)) + goto retry; + break; + case FSID_UUID8: + case FSID_UUID16: + if (!root_export) + goto retry; + /* fall through */ + case FSID_UUID4_INUM: + case FSID_UUID16_INUM: + if (exp->ex_uuid == NULL) + goto retry; + break; + } } else if (exp->ex_uuid) { if (fhp->fh_maxsize >= 64) { if (root_export) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 5cc2eec981b..b2c7147aa92 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -155,7 +155,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, argp->count); argp->count = NFSSVC_MAXBLKSIZE_V2; } - svc_reserve(rqstp, (19<<2) + argp->count + 4); + svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d7759ce6ed9..ff55950efb4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -9,7 +9,7 @@ */ #include <linux/module.h> - +#include <linux/sched.h> #include <linux/time.h> #include <linux/errno.h> #include <linux/nfs.h> diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 0c24b9e24fe..cb3e7fadb77 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -231,9 +231,10 @@ int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_sattr(p, &args->attrs))) + p = decode_fh(p, &args->fh); + if (!p) return 0; + p = decode_sattr(p, &args->attrs); return xdr_argsize_check(rqstp, p); } @@ -284,8 +285,9 @@ int nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_writeargs *args) { - unsigned int len; + unsigned int len, hdr, dlen; int v; + if (!(p = decode_fh(p, &args->fh))) return 0; @@ -293,11 +295,30 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, args->offset = ntohl(*p++); /* offset */ p++; /* totalcount */ len = args->len = ntohl(*p++); - rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - - (((void*)p) - rqstp->rq_arg.head[0].iov_base); + /* + * The protocol specifies a maximum of 8192 bytes. + */ if (len > NFSSVC_MAXBLKSIZE_V2) - len = NFSSVC_MAXBLKSIZE_V2; + return 0; + + /* + * Check to make sure that we got the right number of + * bytes. + */ + hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; + dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + - hdr; + + /* + * Round the length of the data which was specified up to + * the next multiple of XDR units and then compare that + * against the length which was actually received. + */ + if (dlen != XDR_QUADLEN(len)*4) + return 0; + + rqstp->rq_vec[0].iov_base = (void*)p; + rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; @@ -306,18 +327,18 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, rqstp->rq_vec[v].iov_len = PAGE_SIZE; } rqstp->rq_vec[v].iov_len = len; - args->vlen = v+1; - return rqstp->rq_vec[0].iov_len > 0; + args->vlen = v + 1; + return 1; } int nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_createargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len)) - || !(p = decode_sattr(p, &args->attrs))) + if ( !(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) return 0; + p = decode_sattr(p, &args->attrs); return xdr_argsize_check(rqstp, p); } @@ -361,11 +382,11 @@ int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_symlinkargs *args) { - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_pathname(p, &args->tname, &args->tlen)) - || !(p = decode_sattr(p, &args->attrs))) + if ( !(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_pathname(p, &args->tname, &args->tlen))) return 0; + p = decode_sattr(p, &args->attrs); return xdr_argsize_check(rqstp, p); } diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 629e7abdd84..6e5c2534f4b 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -86,19 +86,15 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } /* Check for the current buffer head overflowing. */ if (unlikely(file_ofs + bh->b_size > init_size)) { - u8 *kaddr; int ofs; ofs = 0; if (file_ofs < init_size) ofs = init_size - file_ofs; local_irq_save(flags); - kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); - memset(kaddr + bh_offset(bh) + ofs, 0, - bh->b_size - ofs); - kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); + zero_user_page(page, bh_offset(bh) + ofs, + bh->b_size - ofs, KM_BIO_SRC_IRQ); local_irq_restore(flags); - flush_dcache_page(page); } } else { clear_buffer_uptodate(bh); @@ -245,8 +241,7 @@ static int ntfs_read_block(struct page *page) rl = NULL; nr = i = 0; do { - u8 *kaddr; - int err; + int err = 0; if (unlikely(buffer_uptodate(bh))) continue; @@ -254,7 +249,6 @@ static int ntfs_read_block(struct page *page) arr[nr++] = bh; continue; } - err = 0; bh->b_bdev = vol->sb->s_bdev; /* Is the block within the allowed limits? */ if (iblock < lblock) { @@ -340,10 +334,7 @@ handle_hole: bh->b_blocknr = -1UL; clear_buffer_mapped(bh); handle_zblock: - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + i * blocksize, 0, blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, i * blocksize, blocksize, KM_USER0); if (likely(!err)) set_buffer_uptodate(bh); } while (i++, iblock++, (bh = bh->b_this_page) != head); @@ -460,10 +451,7 @@ retry_readpage: * ok to ignore the compressed flag here. */ if (unlikely(page->index > 0)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); goto done; } if (!NInoAttr(ni)) @@ -790,14 +778,10 @@ lock_retry_remap: * uptodate so it can get discarded by the VM. */ if (err == -ENOENT || lcn == LCN_ENOENT) { - u8 *kaddr; - bh->b_blocknr = -1; clear_buffer_dirty(bh); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh), 0, blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, bh_offset(bh), blocksize, + KM_USER0); set_buffer_uptodate(bh); err = 0; continue; @@ -1422,10 +1406,8 @@ retry_writepage: if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { /* The page straddles i_size. */ unsigned int ofs = i_size & ~PAGE_CACHE_MASK; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, ofs, PAGE_CACHE_SIZE - ofs, + KM_USER0); } /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 74f99a6a369..34314b33dbd 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -20,7 +20,6 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/smp_lock.h> #include <linux/buffer_head.h> #include "dir.h" diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index dbbac559310..7ed56390b58 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -26,6 +26,7 @@ #include <linux/swap.h> #include <linux/uio.h> #include <linux/writeback.h> +#include <linux/sched.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -606,11 +607,8 @@ do_next_page: ntfs_submit_bh_for_read(bh); *wait_bh++ = bh; } else { - u8 *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh), 0, - blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, bh_offset(bh), + blocksize, KM_USER0); set_buffer_uptodate(bh); } } @@ -685,12 +683,9 @@ map_buffer_cached: ntfs_submit_bh_for_read(bh); *wait_bh++ = bh; } else { - u8 *kaddr = kmap_atomic(page, - KM_USER0); - memset(kaddr + bh_offset(bh), - 0, blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, + bh_offset(bh), + blocksize, KM_USER0); set_buffer_uptodate(bh); } } @@ -708,11 +703,8 @@ map_buffer_cached: */ if (bh_end <= pos || bh_pos >= end) { if (!buffer_uptodate(bh)) { - u8 *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh), 0, - blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, bh_offset(bh), + blocksize, KM_USER0); set_buffer_uptodate(bh); } mark_buffer_dirty(bh); @@ -751,10 +743,8 @@ map_buffer_cached: if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh)) { - u8 *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh), 0, blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, bh_offset(bh), blocksize, + KM_USER0); set_buffer_uptodate(bh); } continue; @@ -878,11 +868,8 @@ rl_not_mapped_enoent: if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh)) { - u8 *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh), 0, - blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, bh_offset(bh), + blocksize, KM_USER0); set_buffer_uptodate(bh); } continue; @@ -1137,16 +1124,12 @@ rl_not_mapped_enoent: * to zero the overflowing region. */ if (unlikely(bh_pos + blocksize > initialized_size)) { - u8 *kaddr; int ofs = 0; if (likely(bh_pos < initialized_size)) ofs = initialized_size - bh_pos; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh) + ofs, 0, - blocksize - ofs); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, bh_offset(bh) + ofs, + blocksize - ofs, KM_USER0); } } else /* if (unlikely(!buffer_uptodate(bh))) */ err = -EIO; @@ -1286,11 +1269,8 @@ rl_not_mapped_enoent: if (PageUptodate(page)) set_buffer_uptodate(bh); else { - u8 *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh), 0, - blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, bh_offset(bh), + blocksize, KM_USER0); set_buffer_uptodate(bh); } } @@ -1350,9 +1330,7 @@ err_out: len = PAGE_CACHE_SIZE; if (len > bytes) len = bytes; - kaddr = kmap_atomic(*pages, KM_USER0); - memset(kaddr, 0, len); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(*pages, 0, len, KM_USER0); } goto out; } @@ -1473,9 +1451,7 @@ err_out: len = PAGE_CACHE_SIZE; if (len > bytes) len = bytes; - kaddr = kmap_atomic(*pages, KM_USER0); - memset(kaddr, 0, len); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(*pages, 0, len, KM_USER0); } goto out; } @@ -2129,28 +2105,13 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; loff_t pos; - unsigned long seg; size_t count; /* after file limit checks */ ssize_t written, err; count = 0; - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - count += iv->iov_len; - if (unlikely((ssize_t)(count|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) - continue; - if (!seg) - return -EFAULT; - nr_segs = seg; - count -= iv->iov_len; /* This segment is no good */ - break; - } + err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); + if (err) + return err; pos = *ppos; vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); /* We can write back this queue in page reclaim. */ diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index f8bf8da67ee..b532a730cec 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -27,7 +27,6 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include "aops.h" #include "attrib.h" @@ -141,7 +140,7 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) if (!ni->name) return -ENOMEM; memcpy(ni->name, na->name, i); - ni->name[i] = 0; + ni->name[na->name_len] = 0; } return 0; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 21d834e5ed7..4566b918255 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3085,8 +3085,7 @@ static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep, { ntfs_inode *ni = (ntfs_inode *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(VFS_I(ni)); + inode_init_once(VFS_I(ni)); } /* diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 8e7cafb5fc6..a480b09c79b 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -222,7 +222,10 @@ static int ocfs2_readpage(struct file *file, struct page *page) goto out; } - down_read(&OCFS2_I(inode)->ip_alloc_sem); + if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) { + ret = AOP_TRUNCATED_PAGE; + goto out_meta_unlock; + } /* * i_size might have just been updated as we grabed the meta lock. We @@ -235,10 +238,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) * XXX sys_readahead() seems to get that wrong? */ if (start >= i_size_read(inode)) { - char *addr = kmap(page); - memset(addr, 0, PAGE_SIZE); - flush_dcache_page(page); - kunmap(page); + zero_user_page(page, 0, PAGE_SIZE, KM_USER0); SetPageUptodate(page); ret = 0; goto out_alloc; @@ -258,6 +258,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) ocfs2_data_unlock(inode, 0); out_alloc: up_read(&OCFS2_I(inode)->ip_alloc_sem); +out_meta_unlock: ocfs2_meta_unlock(inode, 0); out: if (unlock) @@ -797,6 +798,11 @@ int ocfs2_map_and_write_splice_data(struct inode *inode, } to = from + bytes; + BUG_ON(from > PAGE_CACHE_SIZE); + BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from < cluster_start); + BUG_ON(to > cluster_end); + if (wc->w_this_page_new) ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, cluster_start, cluster_end, 1); @@ -808,11 +814,6 @@ int ocfs2_map_and_write_splice_data(struct inode *inode, goto out; } - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); - BUG_ON(from > osb->s_clustersize); - BUG_ON(to > osb->s_clustersize); - src = buf->ops->map(sp->s_pipe, buf, 1); dst = kmap_atomic(wc->w_this_page, KM_USER1); memcpy(dst + from, src + src_from, bytes); @@ -889,6 +890,11 @@ int ocfs2_map_and_write_user_data(struct inode *inode, to = from + bytes; + BUG_ON(from > PAGE_CACHE_SIZE); + BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from < cluster_start); + BUG_ON(to > cluster_end); + if (wc->w_this_page_new) ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, cluster_start, cluster_end, 1); @@ -900,11 +906,6 @@ int ocfs2_map_and_write_user_data(struct inode *inode, goto out; } - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); - BUG_ON(from > osb->s_clustersize); - BUG_ON(to > osb->s_clustersize); - dst = kmap(wc->w_this_page); memcpy(dst + from, bp->b_src_buf + src_from, bytes); kunmap(wc->w_this_page); diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 2e975c0a35e..2b205f5d579 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -157,7 +157,7 @@ int mlog_sys_init(struct kset *o2cb_subsys) } mlog_attr_ptrs[i] = NULL; - kobj_set_kset_s(&mlog_kset, o2cb_subsys); + kobj_set_kset_s(&mlog_kset, *o2cb_subsys); return kset_register(&mlog_kset); } diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index d4e46d067ed..fd8cb1badc9 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -42,7 +42,6 @@ #include <linux/highmem.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <asm/uaccess.h> @@ -263,12 +262,10 @@ static void dlmfs_init_once(void *foo, struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - ip->ip_dlm = NULL; - ip->ip_parent = NULL; + ip->ip_dlm = NULL; + ip->ip_parent = NULL; - inode_init_once(&ip->ip_vfs_inode); - } + inode_init_once(&ip->ip_vfs_inode); } static struct inode *dlmfs_alloc_inode(struct super_block *sb) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 024777abc8e..d1bd305ef0d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -27,7 +27,6 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/mm.h> -#include <linux/smp_lock.h> #include <linux/crc32.h> #include <linux/kthread.h> #include <linux/pagemap.h> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9395b4fa547..ac6c96431bb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -326,6 +326,7 @@ static int ocfs2_truncate_file(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)new_i_size); + unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(inode->i_mapping, new_i_size); fe = (struct ocfs2_dinode *) di_bh->b_data; @@ -1418,36 +1419,6 @@ out: return total ? total : ret; } -static int ocfs2_check_iovec(const struct iovec *iov, size_t *counted, - unsigned long *nr_segs) -{ - size_t ocount; /* original count */ - unsigned long seg; - - ocount = 0; - for (seg = 0; seg < *nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - ocount += iv->iov_len; - if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - *nr_segs = seg; - ocount -= iv->iov_len; /* This segment is no good */ - break; - } - - *counted = ocount; - return 0; -} - static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, @@ -1470,7 +1441,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (iocb->ki_left == 0) return 0; - ret = ocfs2_check_iovec(iov, &ocount, &nr_segs); + ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); if (ret) return ret; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index bc844bfe607..c53a6763bbb 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -28,7 +28,6 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <asm/byteorder.h> diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 4dedd978910..545f7892cdf 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -471,9 +471,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, mutex_lock(&local_alloc_inode->i_mutex); - ac->ac_inode = local_alloc_inode; - ac->ac_which = OCFS2_AC_USE_LOCAL; - if (osb->local_alloc_state != OCFS2_LA_ENABLED) { status = -ENOSPC; goto bail; @@ -511,10 +508,14 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, } } + ac->ac_inode = local_alloc_inode; + ac->ac_which = OCFS2_AC_USE_LOCAL; get_bh(osb->local_alloc_bh); ac->ac_bh = osb->local_alloc_bh; status = 0; bail: + if (status < 0 && local_alloc_inode) + iput(local_alloc_inode); mlog_exit(status); return status; diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d921a28329d..d8b79067dc1 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -26,7 +26,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/smp_lock.h> #define MLOG_MASK_PREFIX ML_SUPER #include <cluster/masklog.h> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7c5e3f5d663..86b559c7dce 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -937,31 +937,29 @@ static void ocfs2_inode_init_once(void *data, { struct ocfs2_inode_info *oi = data; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - oi->ip_flags = 0; - oi->ip_open_count = 0; - spin_lock_init(&oi->ip_lock); - ocfs2_extent_map_init(&oi->vfs_inode); - INIT_LIST_HEAD(&oi->ip_io_markers); - oi->ip_created_trans = 0; - oi->ip_last_trans = 0; - oi->ip_dir_start_lookup = 0; + oi->ip_flags = 0; + oi->ip_open_count = 0; + spin_lock_init(&oi->ip_lock); + ocfs2_extent_map_init(&oi->vfs_inode); + INIT_LIST_HEAD(&oi->ip_io_markers); + oi->ip_created_trans = 0; + oi->ip_last_trans = 0; + oi->ip_dir_start_lookup = 0; - init_rwsem(&oi->ip_alloc_sem); - mutex_init(&oi->ip_io_mutex); + init_rwsem(&oi->ip_alloc_sem); + mutex_init(&oi->ip_io_mutex); - oi->ip_blkno = 0ULL; - oi->ip_clusters = 0; + oi->ip_blkno = 0ULL; + oi->ip_clusters = 0; - ocfs2_lock_res_init_once(&oi->ip_rw_lockres); - ocfs2_lock_res_init_once(&oi->ip_meta_lockres); - ocfs2_lock_res_init_once(&oi->ip_data_lockres); - ocfs2_lock_res_init_once(&oi->ip_open_lockres); + ocfs2_lock_res_init_once(&oi->ip_rw_lockres); + ocfs2_lock_res_init_once(&oi->ip_meta_lockres); + ocfs2_lock_res_init_once(&oi->ip_data_lockres); + ocfs2_lock_res_init_once(&oi->ip_open_lockres); - ocfs2_metadata_cache_init(&oi->vfs_inode); + ocfs2_metadata_cache_init(&oi->vfs_inode); - inode_init_once(&oi->vfs_inode); - } + inode_init_once(&oi->vfs_inode); } static int ocfs2_initialize_mem_caches(void) diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 4f82a2f0efe..66a13ee63d4 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c @@ -26,7 +26,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/smp_lock.h> #include <linux/kthread.h> #include <cluster/heartbeat.h> diff --git a/fs/open.c b/fs/open.c index c989fb4cf7b..0d515d16197 100644 --- a/fs/open.c +++ b/fs/open.c @@ -7,7 +7,6 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/file.h> -#include <linux/smp_lock.h> #include <linux/quotaops.h> #include <linux/fsnotify.h> #include <linux/module.h> @@ -211,6 +210,9 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ATTR_FILE; } + /* Remove suid/sgid on truncate too */ + newattrs.ia_valid |= should_remove_suid(dentry); + mutex_lock(&dentry->d_inode->i_mutex); err = notify_change(dentry, &newattrs); mutex_unlock(&dentry->d_inode->i_mutex); diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 731a90e9f0c..e62397341c3 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -419,8 +419,7 @@ static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned { struct op_inode_info *oi = (struct op_inode_info *) data; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&oi->vfs_inode); + inode_init_once(&oi->vfs_inode); } static int __init init_openprom_fs(void) diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 6e8bb66fe61..a99acd8de35 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -166,8 +166,12 @@ config LDM_PARTITION depends on PARTITION_ADVANCED ---help--- Say Y here if you would like to use hard disks under Linux which - were partitioned using Windows 2000's or XP's Logical Disk Manager. - They are also known as "Dynamic Disks". + were partitioned using Windows 2000's/XP's or Vista's Logical Disk + Manager. They are also known as "Dynamic Disks". + + Note this driver only supports Dynamic Disks with a protective MBR + label, i.e. DOS partition table. It does not support GPT labelled + Dynamic Disks yet as can be created with Vista. Windows 2000 introduced the concept of Dynamic Disks to get around the limitations of the PC's partitioning scheme. The Logical Disk @@ -175,8 +179,8 @@ config LDM_PARTITION mirrored, striped or RAID volumes, all without the need for rebooting. - Normal partitions are now called Basic Disks under Windows 2000 and - XP. + Normal partitions are now called Basic Disks under Windows 2000, XP, + and Vista. For a fuller description read <file:Documentation/ldm.txt>. @@ -236,3 +240,12 @@ config EFI_PARTITION help Say Y here if you would like to use hard disks under Linux which were partitioned using EFI GPT. + +config SYSV68_PARTITION + bool "SYSV68 partition table support" if PARTITION_ADVANCED + default y if VME + help + Say Y here if you would like to be able to read the hard disk + partition table format used by Motorola Delta machines (using + sysv68). + Otherwise, say N. diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile index 67e665fdb7f..03af8eac51d 100644 --- a/fs/partitions/Makefile +++ b/fs/partitions/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o obj-$(CONFIG_IBM_PARTITION) += ibm.o obj-$(CONFIG_EFI_PARTITION) += efi.o obj-$(CONFIG_KARMA_PARTITION) += karma.o +obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6b9dae3f0e6..9a3a058f355 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -34,6 +34,7 @@ #include "ultrix.h" #include "efi.h" #include "karma.h" +#include "sysv68.h" #ifdef CONFIG_BLK_DEV_MD extern void md_autodetect_dev(dev_t dev); @@ -105,6 +106,9 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = #ifdef CONFIG_KARMA_PARTITION karma_partition, #endif +#ifdef CONFIG_SYSV68_PARTITION + sysv68_partition, +#endif NULL }; diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 1bea610078b..e7b07006bc4 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -152,7 +152,7 @@ last_lba(struct block_device *bdev) } static inline int -pmbr_part_valid(struct partition *part, u64 lastlba) +pmbr_part_valid(struct partition *part) { if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT && le32_to_cpu(part->start_sect) == 1UL) @@ -163,7 +163,6 @@ pmbr_part_valid(struct partition *part, u64 lastlba) /** * is_pmbr_valid(): test Protective MBR for validity * @mbr: pointer to a legacy mbr structure - * @lastlba: last_lba for the whole device * * Description: Returns 1 if PMBR is valid, 0 otherwise. * Validity depends on two things: @@ -171,13 +170,13 @@ pmbr_part_valid(struct partition *part, u64 lastlba) * 2) One partition of type 0xEE is found */ static int -is_pmbr_valid(legacy_mbr *mbr, u64 lastlba) +is_pmbr_valid(legacy_mbr *mbr) { int i; if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE) return 0; for (i = 0; i < 4; i++) - if (pmbr_part_valid(&mbr->partition_record[i], lastlba)) + if (pmbr_part_valid(&mbr->partition_record[i])) return 1; return 0; } @@ -516,7 +515,7 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; gpt_header *pgpt = NULL, *agpt = NULL; gpt_entry *pptes = NULL, *aptes = NULL; - legacy_mbr *legacymbr = NULL; + legacy_mbr *legacymbr; u64 lastlba; if (!bdev || !gpt || !ptes) return 0; @@ -528,9 +527,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) if (legacymbr) { read_lba(bdev, 0, (u8 *) legacymbr, sizeof (*legacymbr)); - good_pmbr = is_pmbr_valid(legacymbr, lastlba); + good_pmbr = is_pmbr_valid(legacymbr); kfree(legacymbr); - legacymbr=NULL; } if (!good_pmbr) goto fail; diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 1a60926a4cc..99873a2b4cb 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -2,10 +2,10 @@ * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> * - * Documentation is available at http://linux-ntfs.sf.net/ldm + * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/ * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software @@ -62,7 +62,6 @@ static void _ldm_printk (const char *level, const char *function, printk ("%s%s(): %s\n", level, function, buf); } - /** * ldm_parse_hexbyte - Convert a ASCII hex number to a byte * @src: Pointer to at least 2 characters to convert. @@ -118,7 +117,6 @@ static bool ldm_parse_guid (const u8 *src, u8 *dest) return true; } - /** * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure * @data: Raw database PRIVHEAD structure loaded from the device @@ -130,46 +128,48 @@ static bool ldm_parse_guid (const u8 *src, u8 *dest) * Return: 'true' @ph contains the PRIVHEAD data * 'false' @ph contents are undefined */ -static bool ldm_parse_privhead (const u8 *data, struct privhead *ph) +static bool ldm_parse_privhead(const u8 *data, struct privhead *ph) { - BUG_ON (!data || !ph); + bool is_vista = false; - if (MAGIC_PRIVHEAD != BE64 (data)) { - ldm_error ("Cannot find PRIVHEAD structure. LDM database is" + BUG_ON(!data || !ph); + if (MAGIC_PRIVHEAD != BE64(data)) { + ldm_error("Cannot find PRIVHEAD structure. LDM database is" " corrupt. Aborting."); return false; } - - ph->ver_major = BE16 (data + 0x000C); - ph->ver_minor = BE16 (data + 0x000E); - ph->logical_disk_start = BE64 (data + 0x011B); - ph->logical_disk_size = BE64 (data + 0x0123); - ph->config_start = BE64 (data + 0x012B); - ph->config_size = BE64 (data + 0x0133); - - if ((ph->ver_major != 2) || (ph->ver_minor != 11)) { - ldm_error ("Expected PRIVHEAD version %d.%d, got %d.%d." - " Aborting.", 2, 11, ph->ver_major, ph->ver_minor); + ph->ver_major = BE16(data + 0x000C); + ph->ver_minor = BE16(data + 0x000E); + ph->logical_disk_start = BE64(data + 0x011B); + ph->logical_disk_size = BE64(data + 0x0123); + ph->config_start = BE64(data + 0x012B); + ph->config_size = BE64(data + 0x0133); + /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ + if (ph->ver_major == 2 && ph->ver_minor == 12) + is_vista = true; + if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) { + ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d." + " Aborting.", ph->ver_major, ph->ver_minor); return false; } + ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major, + ph->ver_minor, is_vista ? "Vista" : "2000/XP"); if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */ - /* Warn the user and continue, carefully */ - ldm_info ("Database is normally %u bytes, it claims to " + /* Warn the user and continue, carefully. */ + ldm_info("Database is normally %u bytes, it claims to " "be %llu bytes.", LDM_DB_SIZE, - (unsigned long long)ph->config_size ); + (unsigned long long)ph->config_size); } - if ((ph->logical_disk_size == 0) || - (ph->logical_disk_start + ph->logical_disk_size > ph->config_start)) { - ldm_error ("PRIVHEAD disk size doesn't match real disk size"); + if ((ph->logical_disk_size == 0) || (ph->logical_disk_start + + ph->logical_disk_size > ph->config_start)) { + ldm_error("PRIVHEAD disk size doesn't match real disk size"); return false; } - - if (!ldm_parse_guid (data + 0x0030, ph->disk_id)) { - ldm_error ("PRIVHEAD contains an invalid GUID."); + if (!ldm_parse_guid(data + 0x0030, ph->disk_id)) { + ldm_error("PRIVHEAD contains an invalid GUID."); return false; } - - ldm_debug ("Parsed PRIVHEAD successfully."); + ldm_debug("Parsed PRIVHEAD successfully."); return true; } @@ -409,7 +409,7 @@ out: * Return: 'true' @toc1 contains validated TOCBLOCK info * 'false' @toc1 contents are undefined */ -static bool ldm_validate_tocblocks (struct block_device *bdev, +static bool ldm_validate_tocblocks(struct block_device *bdev, unsigned long base, struct ldmdb *ldb) { static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; @@ -417,54 +417,57 @@ static bool ldm_validate_tocblocks (struct block_device *bdev, struct privhead *ph; Sector sect; u8 *data; + int i, nr_tbs; bool result = false; - int i; - BUG_ON (!bdev || !ldb); - - ph = &ldb->ph; + BUG_ON(!bdev || !ldb); + ph = &ldb->ph; tb[0] = &ldb->toc; - tb[1] = kmalloc (sizeof (*tb[1]), GFP_KERNEL); - tb[2] = kmalloc (sizeof (*tb[2]), GFP_KERNEL); - tb[3] = kmalloc (sizeof (*tb[3]), GFP_KERNEL); - if (!tb[1] || !tb[2] || !tb[3]) { - ldm_crit ("Out of memory."); - goto out; + tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); + if (!tb[1]) { + ldm_crit("Out of memory."); + goto err; } - - for (i = 0; i < 4; i++) /* Read and parse all four toc's. */ - { - data = read_dev_sector (bdev, base + off[i], §); + tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1])); + tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2])); + /* + * Try to read and parse all four TOCBLOCKs. + * + * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so + * skip any that fail as long as we get at least one valid TOCBLOCK. + */ + for (nr_tbs = i = 0; i < 4; i++) { + data = read_dev_sector(bdev, base + off[i], §); if (!data) { - ldm_crit ("Disk read failed."); - goto out; + ldm_error("Disk read failed for TOCBLOCK %d.", i); + continue; } - result = ldm_parse_tocblock (data, tb[i]); - put_dev_sector (sect); - if (!result) - goto out; /* Already logged */ + if (ldm_parse_tocblock(data, tb[nr_tbs])) + nr_tbs++; + put_dev_sector(sect); } - - /* Range check the toc against a privhead. */ + if (!nr_tbs) { + ldm_crit("Failed to find a valid TOCBLOCK."); + goto err; + } + /* Range check the TOCBLOCK against a privhead. */ if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) || - ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) > ph->config_size)) { - ldm_crit ("The bitmaps are out of range. Giving up."); - goto out; + ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) > + ph->config_size)) { + ldm_crit("The bitmaps are out of range. Giving up."); + goto err; } - - if (!ldm_compare_tocblocks (tb[0], tb[1]) || /* Compare all tocs. */ - !ldm_compare_tocblocks (tb[0], tb[2]) || - !ldm_compare_tocblocks (tb[0], tb[3])) { - ldm_crit ("The TOCBLOCKs don't match."); - goto out; + /* Compare all loaded TOCBLOCKs. */ + for (i = 1; i < nr_tbs; i++) { + if (!ldm_compare_tocblocks(tb[0], tb[i])) { + ldm_crit("TOCBLOCKs 0 and %d do not match.", i); + goto err; + } } - - ldm_debug ("Validated TOCBLOCKs successfully."); + ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs); result = true; -out: - kfree (tb[1]); - kfree (tb[2]); - kfree (tb[3]); +err: + kfree(tb[1]); return result; } @@ -566,7 +569,7 @@ static bool ldm_validate_partition_table (struct block_device *bdev) p = (struct partition*)(data + 0x01BE); for (i = 0; i < 4; i++, p++) - if (SYS_IND (p) == WIN2K_DYNAMIC_PARTITION) { + if (SYS_IND (p) == LDM_PARTITION) { result = true; break; } @@ -975,44 +978,68 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) * Return: 'true' @vb contains a Partition VBLK * 'false' @vb contents are not defined */ -static bool ldm_parse_prt3 (const u8 *buffer, int buflen, struct vblk *vb) +static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len; struct vblk_part *part; - BUG_ON (!buffer || !vb); - - r_objid = ldm_relative (buffer, buflen, 0x18, 0); - r_name = ldm_relative (buffer, buflen, 0x18, r_objid); - r_size = ldm_relative (buffer, buflen, 0x34, r_name); - r_parent = ldm_relative (buffer, buflen, 0x34, r_size); - r_diskid = ldm_relative (buffer, buflen, 0x34, r_parent); - + BUG_ON(!buffer || !vb); + r_objid = ldm_relative(buffer, buflen, 0x18, 0); + if (r_objid < 0) { + ldm_error("r_objid %d < 0", r_objid); + return false; + } + r_name = ldm_relative(buffer, buflen, 0x18, r_objid); + if (r_name < 0) { + ldm_error("r_name %d < 0", r_name); + return false; + } + r_size = ldm_relative(buffer, buflen, 0x34, r_name); + if (r_size < 0) { + ldm_error("r_size %d < 0", r_size); + return false; + } + r_parent = ldm_relative(buffer, buflen, 0x34, r_size); + if (r_parent < 0) { + ldm_error("r_parent %d < 0", r_parent); + return false; + } + r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent); + if (r_diskid < 0) { + ldm_error("r_diskid %d < 0", r_diskid); + return false; + } if (buffer[0x12] & VBLK_FLAG_PART_INDEX) { - r_index = ldm_relative (buffer, buflen, 0x34, r_diskid); + r_index = ldm_relative(buffer, buflen, 0x34, r_diskid); + if (r_index < 0) { + ldm_error("r_index %d < 0", r_index); + return false; + } len = r_index; } else { r_index = 0; len = r_diskid; } - if (len < 0) + if (len < 0) { + ldm_error("len %d < 0", len); return false; - + } len += VBLK_SIZE_PRT3; - if (len != BE32 (buffer + 0x14)) + if (len > BE32(buffer + 0x14)) { + ldm_error("len %d > BE32(buffer + 0x14) %d", len, + BE32(buffer + 0x14)); return false; - + } part = &vb->vblk.part; - part->start = BE64 (buffer + 0x24 + r_name); - part->volume_offset = BE64 (buffer + 0x2C + r_name); - part->size = ldm_get_vnum (buffer + 0x34 + r_name); - part->parent_id = ldm_get_vnum (buffer + 0x34 + r_size); - part->disk_id = ldm_get_vnum (buffer + 0x34 + r_parent); + part->start = BE64(buffer + 0x24 + r_name); + part->volume_offset = BE64(buffer + 0x2C + r_name); + part->size = ldm_get_vnum(buffer + 0x34 + r_name); + part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); + part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); if (vb->flags & VBLK_FLAG_PART_INDEX) part->partnum = buffer[0x35 + r_diskid]; else part->partnum = 0; - return true; } @@ -1475,4 +1502,3 @@ out: kfree (ldb); return result; } - diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index 6e8d7952b8b..d2e6a304693 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h @@ -2,10 +2,10 @@ * ldm - Part of the Linux-NTFS project. * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> - * Copyright (C) 2001 Anton Altaparmakov <aia21@cantab.net> + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> * - * Documentation is available at http://linux-ntfs.sf.net/ldm + * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -93,7 +93,7 @@ struct parsed_partitions; #define OFF_VMDB 17 /* List of partitions. */ -#define WIN2K_DYNAMIC_PARTITION 0x42 /* Formerly SFS (Landis). */ +#define LDM_PARTITION 0x42 /* Formerly SFS (Landis). */ #define TOC_BITMAP1 "config" /* Names of the two defined */ #define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c new file mode 100644 index 00000000000..4eba27b7864 --- /dev/null +++ b/fs/partitions/sysv68.c @@ -0,0 +1,92 @@ +/* + * fs/partitions/sysv68.c + * + * Copyright (C) 2007 Philippe De Muyter <phdm@macqel.be> + */ + +#include "check.h" +#include "sysv68.h" + +/* + * Volume ID structure: on first 256-bytes sector of disk + */ + +struct volumeid { + u8 vid_unused[248]; + u8 vid_mac[8]; /* ASCII string "MOTOROLA" */ +}; + +/* + * config block: second 256-bytes sector on disk + */ + +struct dkconfig { + u8 ios_unused0[128]; + __be32 ios_slcblk; /* Slice table block number */ + __be16 ios_slccnt; /* Number of entries in slice table */ + u8 ios_unused1[122]; +}; + +/* + * combined volumeid and dkconfig block + */ + +struct dkblk0 { + struct volumeid dk_vid; + struct dkconfig dk_ios; +}; + +/* + * Slice Table Structure + */ + +struct slice { + __be32 nblocks; /* slice size (in blocks) */ + __be32 blkoff; /* block offset of slice */ +}; + + +int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) +{ + int i, slices; + int slot = 1; + Sector sect; + unsigned char *data; + struct dkblk0 *b; + struct slice *slice; + + data = read_dev_sector(bdev, 0, §); + if (!data) + return -1; + + b = (struct dkblk0 *)data; + if (memcmp(b->dk_vid.vid_mac, "MOTOROLA", sizeof(b->dk_vid.vid_mac))) { + put_dev_sector(sect); + return 0; + } + slices = be16_to_cpu(b->dk_ios.ios_slccnt); + i = be32_to_cpu(b->dk_ios.ios_slcblk); + put_dev_sector(sect); + + data = read_dev_sector(bdev, i, §); + if (!data) + return -1; + + slices -= 1; /* last slice is the whole disk */ + printk("sysV68: %s(s%u)", state->name, slices); + slice = (struct slice *)data; + for (i = 0; i < slices; i++, slice++) { + if (slot == state->limit) + break; + if (be32_to_cpu(slice->nblocks)) { + put_partition(state, slot, + be32_to_cpu(slice->blkoff), + be32_to_cpu(slice->nblocks)); + printk("(s%u)", i); + } + slot++; + } + printk("\n"); + put_dev_sector(sect); + return 1; +} diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h new file mode 100644 index 00000000000..fa733f68431 --- /dev/null +++ b/fs/partitions/sysv68.h @@ -0,0 +1 @@ +extern int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev); diff --git a/fs/pipe.c b/fs/pipe.c index ebafde7d6ab..3a89592bdf5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -841,8 +841,18 @@ static int pipefs_delete_dentry(struct dentry *dentry) return 0; } +/* + * pipefs_dname() is called from d_path(). + */ +static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) +{ + return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", + dentry->d_inode->i_ino); +} + static struct dentry_operations pipefs_dentry_operations = { .d_delete = pipefs_delete_dentry, + .d_dname = pipefs_dname, }; static struct inode * get_pipe_inode(void) @@ -888,8 +898,7 @@ struct file *create_write_pipe(void) struct inode *inode; struct file *f; struct dentry *dentry; - char name[32]; - struct qstr this; + struct qstr name = { .name = "" }; f = get_empty_filp(); if (!f) @@ -899,11 +908,8 @@ struct file *create_write_pipe(void) if (!inode) goto err_file; - this.len = sprintf(name, "[%lu]", inode->i_ino); - this.name = name; - this.hash = 0; err = -ENOMEM; - dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); + dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); if (!dentry) goto err_inode; diff --git a/fs/pnode.c b/fs/pnode.c index 56aacead836..89940f243fc 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -59,7 +59,7 @@ static int do_make_slave(struct vfsmount *mnt) } else { struct list_head *p = &mnt->mnt_slave_list; while (!list_empty(p)) { - slave_mnt = list_entry(p->next, + slave_mnt = list_first_entry(p, struct vfsmount, mnt_slave); list_del_init(&slave_mnt->mnt_slave); slave_mnt->mnt_master = NULL; diff --git a/fs/proc/array.c b/fs/proc/array.c index 07c9cdbcdca..74f30e0c038 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -410,9 +410,9 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); - res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ + res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n", +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", task->pid, tcomm, state, diff --git a/fs/proc/base.c b/fs/proc/base.c index ec158dd02b3..a5fa1fdafc4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -61,9 +61,9 @@ #include <linux/namei.h> #include <linux/mnt_namespace.h> #include <linux/mm.h> -#include <linux/smp_lock.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> +#include <linux/module.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> @@ -90,8 +90,8 @@ #define PROC_NUMBUF 13 struct pid_entry { - int len; char *name; + int len; mode_t mode; const struct inode_operations *iop; const struct file_operations *fop; @@ -99,8 +99,8 @@ struct pid_entry { }; #define NOD(NAME, MODE, IOP, FOP, OP) { \ - .len = sizeof(NAME) - 1, \ .name = (NAME), \ + .len = sizeof(NAME) - 1, \ .mode = MODE, \ .iop = IOP, \ .fop = FOP, \ @@ -123,6 +123,9 @@ struct pid_entry { NULL, &proc_info_file_operations, \ { .proc_read = &proc_##OTYPE } ) +int maps_protect; +EXPORT_SYMBOL(maps_protect); + static struct fs_struct *get_fs_struct(struct task_struct *task) { struct fs_struct *fs; @@ -275,17 +278,15 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer) */ static int proc_pid_wchan(struct task_struct *task, char *buffer) { - char *modname; - const char *sym_name; - unsigned long wchan, size, offset; - char namebuf[KSYM_NAME_LEN+1]; + unsigned long wchan; + char symname[KSYM_NAME_LEN+1]; wchan = get_wchan(task); - sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); - if (sym_name) - return sprintf(buffer, "%s", sym_name); - return sprintf(buffer, "%lu", wchan); + if (lookup_symbol_name(wchan, symname) < 0) + return sprintf(buffer, "%lu", wchan); + else + return sprintf(buffer, "%s", symname); } #endif /* CONFIG_KALLSYMS */ @@ -310,7 +311,9 @@ static int proc_oom_score(struct task_struct *task, char *buffer) struct timespec uptime; do_posix_clock_monotonic_gettime(&uptime); + read_lock(&tasklist_lock); points = badness(task, uptime.tv_sec); + read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } @@ -344,11 +347,8 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr) return -EPERM; error = inode_change_ok(inode, attr); - if (!error) { - error = security_inode_setattr(dentry, attr); - if (!error) - error = inode_setattr(inode, attr); - } + if (!error) + error = inode_setattr(inode, attr); return error; } @@ -660,7 +660,6 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, char buffer[PROC_NUMBUF]; size_t len; int oom_adjust; - loff_t __ppos = *ppos; if (!task) return -ESRCH; @@ -668,14 +667,8 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); - if (__ppos >= len) - return 0; - if (count > len-__ppos) - count = len-__ppos; - if (copy_to_user(buf, buffer + __ppos, count)) - return -EFAULT; - *ppos = __ppos + count; - return count; + + return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t oom_adjust_write(struct file *file, const char __user *buf, @@ -715,6 +708,7 @@ static const struct file_operations proc_oom_adjust_operations = { .write = oom_adjust_write, }; +#ifdef CONFIG_MMU static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -748,6 +742,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, static struct file_operations proc_clear_refs_operations = { .write = clear_refs_write, }; +#endif #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 @@ -823,7 +818,6 @@ static ssize_t seccomp_read(struct file *file, char __user *buf, { struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); char __buf[20]; - loff_t __ppos = *ppos; size_t len; if (!tsk) @@ -831,14 +825,8 @@ static ssize_t seccomp_read(struct file *file, char __user *buf, /* no need to print the trailing zero, so use only len */ len = sprintf(__buf, "%u\n", tsk->seccomp.mode); put_task_struct(tsk); - if (__ppos >= len) - return 0; - if (count > len - __ppos) - count = len - __ppos; - if (copy_to_user(buf, __buf + __ppos, count)) - return -EFAULT; - *ppos = __ppos + count; - return count; + + return simple_read_from_buffer(buf, count, ppos, __buf, len); } static ssize_t seccomp_write(struct file *file, const char __user *buf, @@ -897,7 +885,6 @@ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, char buffer[PROC_NUMBUF]; size_t len; int make_it_fail; - loff_t __ppos = *ppos; if (!task) return -ESRCH; @@ -905,14 +892,8 @@ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); - if (__ppos >= len) - return 0; - if (count > len-__ppos) - count = len-__ppos; - if (copy_to_user(buf, buffer + __ppos, count)) - return -EFAULT; - *ppos = __ppos + count; - return count; + + return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t proc_fault_inject_write(struct file * file, @@ -975,7 +956,7 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, if (!tmp) return -ENOMEM; - + inode = dentry->d_inode; path = d_path(dentry, mnt, tmp, PAGE_SIZE); len = PTR_ERR(path); @@ -1155,7 +1136,8 @@ static struct dentry_operations pid_dentry_operations = /* Lookups */ -typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *); +typedef struct dentry *instantiate_t(struct inode *, struct dentry *, + struct task_struct *, const void *); /* * Fill a directory entry. @@ -1171,7 +1153,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct tas */ static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, char *name, int len, - instantiate_t instantiate, struct task_struct *task, void *ptr) + instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = filp->f_path.dentry; struct inode *inode; @@ -1233,7 +1215,10 @@ out: return ~0U; } -static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +#define PROC_FDINFO_MAX 64 + +static int proc_fd_info(struct inode *inode, struct dentry **dentry, + struct vfsmount **mnt, char *info) { struct task_struct *task = get_proc_task(inode); struct files_struct *files = NULL; @@ -1252,8 +1237,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { - *mnt = mntget(file->f_path.mnt); - *dentry = dget(file->f_path.dentry); + if (mnt) + *mnt = mntget(file->f_path.mnt); + if (dentry) + *dentry = dget(file->f_path.dentry); + if (info) + snprintf(info, PROC_FDINFO_MAX, + "pos:\t%lli\n" + "flags:\t0%o\n", + (long long) file->f_pos, + file->f_flags); spin_unlock(&files->file_lock); put_files_struct(files); return 0; @@ -1264,6 +1257,12 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm return -ENOENT; } +static int proc_fd_link(struct inode *inode, struct dentry **dentry, + struct vfsmount **mnt) +{ + return proc_fd_info(inode, dentry, mnt, NULL); +} + static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -1306,9 +1305,9 @@ static struct dentry_operations tid_fd_dentry_operations = }; static struct dentry *proc_fd_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { - unsigned fd = *(unsigned *)ptr; + unsigned fd = *(const unsigned *)ptr; struct file *file; struct files_struct *files; struct inode *inode; @@ -1359,7 +1358,9 @@ out_iput: goto out; } -static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_lookupfd_common(struct inode *dir, + struct dentry *dentry, + instantiate_t instantiate) { struct task_struct *task = get_proc_task(dir); unsigned fd = name_to_int(dentry); @@ -1370,23 +1371,15 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, if (fd == ~0U) goto out; - result = proc_fd_instantiate(dir, dentry, task, &fd); + result = instantiate(dir, dentry, task, &fd); out: put_task_struct(task); out_no_task: return result; } -static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, int fd) -{ - char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", fd); - return proc_fill_cache(filp, dirent, filldir, name, len, - proc_fd_instantiate, task, &fd); -} - -static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) +static int proc_readfd_common(struct file * filp, void * dirent, + filldir_t filldir, instantiate_t instantiate) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -1422,12 +1415,17 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) for (fd = filp->f_pos-2; fd < fdt->max_fds; fd++, filp->f_pos++) { + char name[PROC_NUMBUF]; + int len; if (!fcheck_files(files, fd)) continue; rcu_read_unlock(); - if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) { + len = snprintf(name, sizeof(name), "%d", fd); + if (proc_fill_cache(filp, dirent, filldir, + name, len, instantiate, + p, &fd) < 0) { rcu_read_lock(); break; } @@ -1442,23 +1440,119 @@ out_no_task: return retval; } +static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); +} + +static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); +} + +static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + char tmp[PROC_FDINFO_MAX]; + int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp); + if (!err) + err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); + return err; +} + +static const struct file_operations proc_fdinfo_file_operations = { + .open = nonseekable_open, + .read = proc_fdinfo_read, +}; + static const struct file_operations proc_fd_operations = { .read = generic_read_dir, .readdir = proc_readfd, }; /* + * /proc/pid/fd needs a special permission handler so that a process can still + * access /proc/self/fd after it has executed a setuid(). + */ +static int proc_fd_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + int rv; + + rv = generic_permission(inode, mask, NULL); + if (rv == 0) + return 0; + if (task_pid(current) == proc_pid(inode)) + rv = 0; + return rv; +} + +/* * proc directories can do almost nothing.. */ static const struct inode_operations proc_fd_inode_operations = { .lookup = proc_lookupfd, + .permission = proc_fd_permission, + .setattr = proc_setattr, +}; + +static struct dentry *proc_fdinfo_instantiate(struct inode *dir, + struct dentry *dentry, struct task_struct *task, const void *ptr) +{ + unsigned fd = *(unsigned *)ptr; + struct inode *inode; + struct proc_inode *ei; + struct dentry *error = ERR_PTR(-ENOENT); + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + ei = PROC_I(inode); + ei->fd = fd; + inode->i_mode = S_IFREG | S_IRUSR; + inode->i_fop = &proc_fdinfo_file_operations; + dentry->d_op = &tid_fd_dentry_operations; + d_add(dentry, inode); + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, NULL)) + error = NULL; + + out: + return error; +} + +static struct dentry *proc_lookupfdinfo(struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); +} + +static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, + proc_fdinfo_instantiate); +} + +static const struct file_operations proc_fdinfo_operations = { + .read = generic_read_dir, + .readdir = proc_readfdinfo, +}; + +/* + * proc directories can do almost nothing.. + */ +static const struct inode_operations proc_fdinfo_inode_operations = { + .lookup = proc_lookupfdinfo, .setattr = proc_setattr, }; + static struct dentry *proc_pident_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct pid_entry *p = ptr; + const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-EINVAL); @@ -1487,13 +1581,13 @@ out: static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, - struct pid_entry *ents, + const struct pid_entry *ents, unsigned int nents) { struct inode *inode; struct dentry *error; struct task_struct *task = get_proc_task(dir); - struct pid_entry *p, *last; + const struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); inode = NULL; @@ -1522,8 +1616,8 @@ out_no_task: return error; } -static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, struct pid_entry *p) +static int proc_pident_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, struct task_struct *task, const struct pid_entry *p) { return proc_fill_cache(filp, dirent, filldir, p->name, p->len, proc_pident_instantiate, task, p); @@ -1531,14 +1625,14 @@ static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t fil static int proc_pident_readdir(struct file *filp, void *dirent, filldir_t filldir, - struct pid_entry *ents, unsigned int nents) + const struct pid_entry *ents, unsigned int nents) { int i; int pid; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); - struct pid_entry *p, *last; + const struct pid_entry *p, *last; ino_t ino; int ret; @@ -1653,7 +1747,7 @@ static const struct file_operations proc_pid_attr_operations = { .write = proc_pid_attr_write, }; -static struct pid_entry attr_dir_stuff[] = { +static const struct pid_entry attr_dir_stuff[] = { REG("current", S_IRUGO|S_IWUGO, pid_attr), REG("prev", S_IRUGO, pid_attr), REG("exec", S_IRUGO|S_IWUGO, pid_attr), @@ -1719,7 +1813,7 @@ static const struct inode_operations proc_self_inode_operations = { * that properly belong to the /proc filesystem, as they describe * describe something that is process related. */ -static struct pid_entry proc_base_stuff[] = { +static const struct pid_entry proc_base_stuff[] = { NOD("self", S_IFLNK|S_IRWXUGO, &proc_self_inode_operations, NULL, {}), }; @@ -1748,9 +1842,9 @@ static struct dentry_operations proc_base_dentry_operations = }; static struct dentry *proc_base_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct pid_entry *p = ptr; + const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-EINVAL); @@ -1798,7 +1892,7 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) { struct dentry *error; struct task_struct *task = get_proc_task(dir); - struct pid_entry *p, *last; + const struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); @@ -1824,8 +1918,8 @@ out_no_task: return error; } -static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, struct pid_entry *p) +static int proc_base_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, struct task_struct *task, const struct pid_entry *p) { return proc_fill_cache(filp, dirent, filldir, p->name, p->len, proc_base_instantiate, task, p); @@ -1862,9 +1956,10 @@ static int proc_pid_io_accounting(struct task_struct *task, char *buffer) static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; -static struct pid_entry tgid_base_stuff[] = { +static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), + DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), INF("environ", S_IRUSR, pid_environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), @@ -2005,7 +2100,7 @@ out: static struct dentry *proc_pid_instantiate(struct inode *dir, struct dentry * dentry, - struct task_struct *task, void *ptr) + struct task_struct *task, const void *ptr) { struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; @@ -2018,7 +2113,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 4; + inode->i_nlink = 5; #ifdef CONFIG_SECURITY inode->i_nlink += 1; #endif @@ -2120,7 +2215,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) goto out_no_task; for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { - struct pid_entry *p = &proc_base_stuff[nr]; + const struct pid_entry *p = &proc_base_stuff[nr]; if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) goto out; } @@ -2146,8 +2241,9 @@ out_no_task: /* * Tasks */ -static struct pid_entry tid_base_stuff[] = { +static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, fd), + DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), INF("environ", S_IRUSR, pid_environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), @@ -2216,7 +2312,7 @@ static const struct inode_operations proc_tid_base_inode_operations = { }; static struct dentry *proc_task_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, void *ptr) + struct dentry *dentry, struct task_struct *task, const void *ptr) { struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; @@ -2228,7 +2324,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 3; + inode->i_nlink = 4; #ifdef CONFIG_SECURITY inode->i_nlink += 1; #endif diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 775fb21294d..8a40e15f5ec 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -398,6 +398,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino = de->low_ino; + de_get(de); spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); @@ -414,6 +415,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam d_add(dentry, inode); return NULL; } + de_put(de); return ERR_PTR(error); } @@ -476,14 +478,21 @@ int proc_readdir(struct file * filp, } do { + struct proc_dir_entry *next; + /* filldir passes info to user space */ + de_get(de); spin_unlock(&proc_subdir_lock); if (filldir(dirent, de->name, de->namelen, filp->f_pos, - de->low_ino, de->mode >> 12) < 0) + de->low_ino, de->mode >> 12) < 0) { + de_put(de); goto out; + } spin_lock(&proc_subdir_lock); filp->f_pos++; - de = de->next; + next = de->next; + de_put(de); + de = next; } while (de); spin_unlock(&proc_subdir_lock); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 22b1158389a..d5ce65c68d7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -21,7 +21,7 @@ #include "internal.h" -static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) +struct proc_dir_entry *de_get(struct proc_dir_entry *de) { if (de) atomic_inc(&de->count); @@ -31,7 +31,7 @@ static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) /* * Decrements the use count and checks for deferred deletion. */ -static void de_put(struct proc_dir_entry *de) +void de_put(struct proc_dir_entry *de) { if (de) { lock_kernel(); @@ -109,8 +109,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct proc_inode *ei = (struct proc_inode *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } int __init proc_init_inodecache(void) @@ -146,13 +145,6 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, { struct inode * inode; - /* - * Increment the use count so the dir entry can't disappear. - */ - de_get(de); - - WARN_ON(de && de->deleted); - if (de != NULL && !try_module_get(de->owner)) goto out_mod; @@ -184,7 +176,6 @@ out_ino: if (de != NULL) module_put(de->owner); out_mod: - de_put(de); return NULL; } @@ -199,6 +190,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent) s->s_op = &proc_sops; s->s_time_gran = 1; + de_get(&proc_root); root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); if (!root_inode) goto out_no_root; @@ -212,6 +204,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent) out_no_root: printk("proc_read_super: get root inode failed\n"); iput(root_inode); + de_put(&proc_root); return -ENOMEM; } MODULE_LICENSE("GPL"); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index f771889183c..b215c3524fa 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -37,6 +37,8 @@ do { \ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); #endif +extern int maps_protect; + extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **); extern int proc_tid_stat(struct task_struct *, char *); diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 75ec6523d29..5fd49e47f83 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -35,7 +35,6 @@ #include <linux/signal.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/times.h> #include <linux/profile.h> @@ -429,18 +428,11 @@ static int slabstats_open(struct inode *inode, struct file *file) return ret; } -static int slabstats_release(struct inode *inode, struct file *file) -{ - struct seq_file *m = file->private_data; - kfree(m->private); - return seq_release(inode, file); -} - static const struct file_operations proc_slabstats_operations = { .open = slabstats_open, .read = seq_read, .llseek = seq_lseek, - .release = slabstats_release, + .release = seq_release_private, }; #endif #endif diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 20e8cbb3436..680c429bfa2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -429,11 +429,8 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) return -EPERM; error = inode_change_ok(inode, attr); - if (!error) { - error = security_inode_setattr(dentry, attr); - if (!error) - error = inode_setattr(inode, attr); - } + if (!error) + error = inode_setattr(inode, attr); return error; } diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index c1bbfbeb035..b3a473b0a19 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -108,6 +108,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) { struct list_head *p; loff_t l = *pos; + + mutex_lock(&tty_mutex); list_for_each(p, &tty_drivers) if (!l--) return list_entry(p, struct tty_driver, tty_drivers); @@ -124,6 +126,7 @@ static void *t_next(struct seq_file *m, void *v, loff_t *pos) static void t_stop(struct seq_file *m, void *v) { + mutex_unlock(&tty_mutex); } static struct seq_operations tty_drivers_op = { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4008c060f7e..c24d81a5a04 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -3,6 +3,7 @@ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/highmem.h> +#include <linux/ptrace.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> @@ -142,6 +143,9 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats dev_t dev = 0; int len; + if (maps_protect && !ptrace_may_attach(task)) + return -EACCES; + if (file) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; @@ -512,11 +516,22 @@ const struct file_operations proc_maps_operations = { #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); +static int show_numa_map_checked(struct seq_file *m, void *v) +{ + struct proc_maps_private *priv = m->private; + struct task_struct *task = priv->task; + + if (maps_protect && !ptrace_may_attach(task)) + return -EACCES; + + return show_numa_map(m, v); +} + static struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, - .show = show_numa_map + .show = show_numa_map_checked }; static int numa_maps_open(struct inode *inode, struct file *file) diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 7cddf6b8635..d8b8c7183c2 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -2,6 +2,7 @@ #include <linux/mm.h> #include <linux/file.h> #include <linux/mount.h> +#include <linux/ptrace.h> #include <linux/seq_file.h> #include "internal.h" @@ -143,6 +144,12 @@ out: static int show_map(struct seq_file *m, void *_vml) { struct vm_list_struct *vml = _vml; + struct proc_maps_private *priv = m->private; + struct task_struct *task = priv->task; + + if (maps_protect && !ptrace_may_attach(task)) + return -EACCES; + return nommu_vma_show(m, vml->vma); } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 75fc8498f2e..8d256eb1181 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -536,8 +536,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/quota.c b/fs/quota.c index b9dae76a0b6..9f237d6182c 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -11,7 +11,6 @@ #include <asm/current.h> #include <asm/uaccess.h> #include <linux/kernel.h> -#include <linux/smp_lock.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/buffer_head.h> @@ -158,7 +157,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t static void quota_sync_sb(struct super_block *sb, int type) { int cnt; - struct inode *discard[MAXQUOTAS]; sb->s_qcop->quota_sync(sb, type); /* This is not very clever (and fast) but currently I don't know about @@ -168,29 +166,21 @@ static void quota_sync_sb(struct super_block *sb, int type) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); - /* Now when everything is written we can discard the pagecache so - * that userspace sees the changes. We need i_mutex and so we could - * not do it inside dqonoff_mutex. Moreover we need to be carefull - * about races with quotaoff() (that is the reason why we have own - * reference to inode). */ + /* + * Now when everything is written we can discard the pagecache so + * that userspace sees the changes. + */ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - discard[cnt] = NULL; if (type != -1 && cnt != type) continue; if (!sb_has_quota_enabled(sb, cnt)) continue; - discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]); + mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA); + truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0); + mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex); } mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (discard[cnt]) { - mutex_lock(&discard[cnt]->i_mutex); - truncate_inode_pages(&discard[cnt]->i_data, 0); - mutex_unlock(&discard[cnt]->i_mutex); - iput(discard[cnt]); - } - } } void sync_dquots(struct super_block *sb, int type) diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index d3fd7c6732d..5d258c40a2f 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -16,7 +16,6 @@ #include <linux/highmem.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <linux/ramfs.h> #include <linux/quotaops.h> @@ -180,7 +179,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) return ret; } - ret = vmtruncate(inode, size); + ret = vmtruncate(inode, newsize); return ret; } @@ -196,6 +195,11 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) unsigned int old_ia_valid = ia->ia_valid; int ret = 0; + /* POSIX UID/GID verification for setting inode attributes */ + ret = inode_change_ok(inode, ia); + if (ret) + return ret; + /* by providing our own setattr() method, we skip this quotaism */ if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid)) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index ff1f7639707..d40d22b347b 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -30,10 +30,9 @@ #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <linux/ramfs.h> - +#include <linux/sched.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/read_write.c b/fs/read_write.c index 1f8dc373ede..4d03008f015 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -37,10 +37,10 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) mutex_lock(&inode->i_mutex); switch (origin) { - case 2: + case SEEK_END: offset += inode->i_size; break; - case 1: + case SEEK_CUR: offset += file->f_pos; } retval = -EINVAL; @@ -63,10 +63,10 @@ loff_t remote_llseek(struct file *file, loff_t offset, int origin) lock_kernel(); switch (origin) { - case 2: + case SEEK_END: offset += i_size_read(file->f_path.dentry->d_inode); break; - case 1: + case SEEK_CUR: offset += file->f_pos; } retval = -EINVAL; @@ -94,10 +94,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) lock_kernel(); switch (origin) { - case 2: + case SEEK_END: offset += i_size_read(file->f_path.dentry->d_inode); break; - case 1: + case SEEK_CUR: offset += file->f_pos; } retval = -EINVAL; @@ -139,7 +139,7 @@ asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) goto bad; retval = -EINVAL; - if (origin <= 2) { + if (origin <= SEEK_MAX) { loff_t res = vfs_llseek(file, offset, origin); retval = res; if (res != (loff_t)retval) @@ -166,7 +166,7 @@ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, goto bad; retval = -EINVAL; - if (origin > 2) + if (origin > SEEK_MAX) goto out_putf; offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, diff --git a/fs/readdir.c b/fs/readdir.c index f39f5b31325..efe52e67657 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -4,13 +4,13 @@ * Copyright (C) 1995 Linus Torvalds */ +#include <linux/kernel.h> #include <linux/module.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/stat.h> #include <linux/file.h> -#include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/dirent.h> #include <linux/security.h> @@ -52,7 +52,6 @@ EXPORT_SYMBOL(vfs_readdir); * case (the low-level handlers don't need to care about this). */ #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) -#define ROUND_UP(x) (((x)+sizeof(long)-1) & ~(sizeof(long)-1)) #ifdef __ARCH_WANT_OLD_READDIR @@ -147,7 +146,7 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, struct linux_dirent __user * dirent; struct getdents_callback * buf = (struct getdents_callback *) __buf; unsigned long d_ino; - int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2); + int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(long)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) @@ -220,8 +219,6 @@ out: return error; } -#define ROUND_UP64(x) (((x)+sizeof(u64)-1) & ~(sizeof(u64)-1)) - struct getdents_callback64 { struct linux_dirent64 __user * current_dir; struct linux_dirent64 __user * previous; @@ -234,7 +231,7 @@ static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, { struct linux_dirent64 __user *dirent; struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; - int reclen = ROUND_UP64(NAME_OFFSET(dirent) + namlen + 1); + int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(u64)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 96a2f8889da..ffbfc2caaf2 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -7,11 +7,10 @@ #include <linux/fs.h> #include <linux/reiserfs_fs.h> #include <linux/stat.h> -#include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <asm/uaccess.h> -extern struct reiserfs_key MIN_KEY; +extern const struct reiserfs_key MIN_KEY; static int reiserfs_readdir(struct file *, void *, filldir_t); static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index abfada2f52d..9e451a68580 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -6,7 +6,6 @@ #include <linux/reiserfs_fs.h> #include <linux/reiserfs_acl.h> #include <linux/reiserfs_xattr.h> -#include <linux/smp_lock.h> #include <asm/uaccess.h> #include <linux/pagemap.h> #include <linux/swap.h> @@ -1060,20 +1059,12 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode maping blocks, since there is none, so we just zero out remaining parts of first and last pages in write area (if needed) */ if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { - if (from != 0) { /* First page needs to be partially zeroed */ - char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); - memset(kaddr, 0, from); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[0]); - } - if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */ - char *kaddr = - kmap_atomic(prepared_pages[num_pages - 1], - KM_USER0); - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[num_pages - 1]); - } + if (from != 0) /* First page needs to be partially zeroed */ + zero_user_page(prepared_pages[0], 0, from, KM_USER0); + + if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */ + zero_user_page(prepared_pages[num_pages-1], to, + PAGE_CACHE_SIZE - to, KM_USER0); /* Since all blocks are new - use already calculated value */ return blocks; @@ -1200,13 +1191,9 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode ll_rw_block(READ, 1, &bh); *wait_bh++ = bh; } else { /* Not mapped, zero it */ - char *kaddr = - kmap_atomic(prepared_pages[0], - KM_USER0); - memset(kaddr + block_start, 0, - from - block_start); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[0]); + zero_user_page(prepared_pages[0], + block_start, + from - block_start, KM_USER0); set_buffer_uptodate(bh); } } @@ -1238,13 +1225,8 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode ll_rw_block(READ, 1, &bh); *wait_bh++ = bh; } else { /* Not mapped, zero it */ - char *kaddr = - kmap_atomic(prepared_pages - [num_pages - 1], - KM_USER0); - memset(kaddr + to, 0, block_end - to); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[num_pages - 1]); + zero_user_page(prepared_pages[num_pages-1], + to, block_end - to, KM_USER0); set_buffer_uptodate(bh); } } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9fcbfe31697..1272d11399f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2148,13 +2148,8 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) length = offset & (blocksize - 1); /* if we are not on a block boundary */ if (length) { - char *kaddr; - length = blocksize - length; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, length, KM_USER0); if (buffer_mapped(bh) && bh->b_blocknr != 0) { mark_buffer_dirty(bh); } @@ -2370,7 +2365,6 @@ static int reiserfs_write_full_page(struct page *page, ** last byte in the file */ if (page->index >= end_index) { - char *kaddr; unsigned last_offset; last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); @@ -2379,10 +2373,7 @@ static int reiserfs_write_full_page(struct page *page, unlock_page(page); return 0; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); } bh = head; block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 7280a23ef34..f25086aeef5 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1110,7 +1110,7 @@ static int flush_commit_list(struct super_block *s, if (!barrier) { /* If there was a write error in the journal - we can't commit * this transaction - it will be invalid and, if successful, - * will just end up propogating the write error out to + * will just end up propagating the write error out to * the file system. */ if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { if (buffer_dirty(jl->j_commit_bh)) @@ -1125,7 +1125,7 @@ static int flush_commit_list(struct super_block *s, /* If there was a write error in the journal - we can't commit this * transaction - it will be invalid and, if successful, will just end - * up propogating the write error out to the filesystem. */ + * up propagating the write error out to the filesystem. */ if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { #ifdef CONFIG_REISERFS_CHECK reiserfs_warning(s, "journal-615: buffer write failed"); @@ -2918,7 +2918,7 @@ static void queue_log_writer(struct super_block *s) set_current_state(TASK_UNINTERRUPTIBLE); if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&journal->j_join_wait, &wait); } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index a2161840bc7..b378eea332c 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -16,7 +16,6 @@ #include <linux/reiserfs_fs.h> #include <linux/reiserfs_acl.h> #include <linux/reiserfs_xattr.h> -#include <linux/smp_lock.h> #include <linux/quotaops.h> #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index ecc9943202f..9aa7a06e093 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -16,11 +16,10 @@ #include <asm/uaccess.h> #include <linux/reiserfs_fs.h> #include <linux/reiserfs_fs_sb.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/proc_fs.h> -#if defined( REISERFS_PROC_INFO ) +#ifdef CONFIG_REISERFS_PROC_INFO /* * LOCKING: diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 315684793d1..976cc7887a0 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -131,6 +131,10 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) /* don't use read_bitmap_block since it will cache * the uninitialized bitmap */ bh = sb_bread(s, i * s->s_blocksize * 8); + if (!bh) { + vfree(bitmap); + return -EIO; + } memset(bh->b_data, 0, sb_blocksize(sb)); reiserfs_test_and_set_le_bit(0, bh->b_data); reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index afb21ea4530..b6f12593c39 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -53,7 +53,6 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/reiserfs_fs.h> -#include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/quotaops.h> diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7054aaef049..b4ac9119200 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -18,7 +18,6 @@ #include <linux/reiserfs_fs.h> #include <linux/reiserfs_acl.h> #include <linux/reiserfs_xattr.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> @@ -433,12 +432,13 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { +#ifdef CONFIG_REISERFS_FS_XATTR if (REISERFS_SB(s)->xattr_root) { d_invalidate(REISERFS_SB(s)->xattr_root); dput(REISERFS_SB(s)->xattr_root); REISERFS_SB(s)->xattr_root = NULL; } - +#endif if (REISERFS_SB(s)->priv_root) { d_invalidate(REISERFS_SB(s)->priv_root); dput(REISERFS_SB(s)->priv_root); @@ -511,14 +511,12 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_prealloc_list); - inode_init_once(&ei->vfs_inode); + INIT_LIST_HEAD(&ei->i_prealloc_list); + inode_init_once(&ei->vfs_inode); #ifdef CONFIG_REISERFS_FS_POSIX_ACL - ei->i_acl_access = NULL; - ei->i_acl_default = NULL; + ei->i_acl_access = NULL; + ei->i_acl_default = NULL; #endif - } } static int init_inodecache(void) @@ -1562,9 +1560,10 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) REISERFS_SB(s)->s_alloc_options.preallocmin = 0; /* Preallocate by 16 blocks (17-1) at once */ REISERFS_SB(s)->s_alloc_options.preallocsize = 17; +#ifdef CONFIG_REISERFS_FS_XATTR /* Initialize the rwsem for xattr dir */ init_rwsem(&REISERFS_SB(s)->xattr_dir_sem); - +#endif /* setup default block allocator options */ reiserfs_init_alloc_options(s); diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 80428519027..2284e03342c 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -566,12 +566,11 @@ static void romfs_destroy_inode(struct inode *inode) kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } -static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { - struct romfs_inode_info *ei = (struct romfs_inode_info *) foo; + struct romfs_inode_info *ei = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/select.c b/fs/select.c index fe0893afd93..a974082b082 100644 --- a/fs/select.c +++ b/fs/select.c @@ -14,10 +14,10 @@ * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). */ +#include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/module.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/poll.h> #include <linux/personality.h> /* for STICKY_TIMEOUTS */ #include <linux/file.h> @@ -26,7 +26,6 @@ #include <asm/uaccess.h> -#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) struct poll_table_page { @@ -65,7 +64,7 @@ EXPORT_SYMBOL(poll_initwait); static void free_poll_entry(struct poll_table_entry *entry) { - remove_wait_queue(entry->wait_address,&entry->wait); + remove_wait_queue(entry->wait_address, &entry->wait); fput(entry->filp); } @@ -129,7 +128,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, entry->filp = filp; entry->wait_address = wait_address; init_waitqueue_entry(&entry->wait, current); - add_wait_queue(wait_address,&entry->wait); + add_wait_queue(wait_address, &entry->wait); } #define FDS_IN(fds, n) (fds->in + n) @@ -399,7 +398,7 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) timeout = -1; /* infinite */ else { - timeout = ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); + timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); timeout += tv.tv_sec * HZ; } } @@ -454,7 +453,7 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) timeout = -1; /* infinite */ else { - timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); + timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); timeout += ts.tv_sec * HZ; } } @@ -776,7 +775,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) timeout = -1; /* infinite */ else { - timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); + timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); timeout += ts.tv_sec * HZ; } } diff --git a/fs/signalfd.c b/fs/signalfd.c new file mode 100644 index 00000000000..3b07f26d984 --- /dev/null +++ b/fs/signalfd.c @@ -0,0 +1,380 @@ +/* + * fs/signalfd.c + * + * Copyright (C) 2003 Linus Torvalds + * + * Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org> + * Changed ->read() to return a siginfo strcture instead of signal number. + * Fixed locking in ->poll(). + * Added sighand-detach notification. + * Added fd re-use in sys_signalfd() syscall. + * Now using anonymous inode source. + * Thanks to Oleg Nesterov for useful code review and suggestions. + * More comments and suggestions from Arnd Bergmann. + * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> + * Retrieve multiple signals with one read() call + */ + +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/list.h> +#include <linux/anon_inodes.h> +#include <linux/signalfd.h> + +struct signalfd_ctx { + struct list_head lnk; + wait_queue_head_t wqh; + sigset_t sigmask; + struct task_struct *tsk; +}; + +struct signalfd_lockctx { + struct task_struct *tsk; + unsigned long flags; +}; + +/* + * Tries to acquire the sighand lock. We do not increment the sighand + * use count, and we do not even pin the task struct, so we need to + * do it inside an RCU read lock, and we must be prepared for the + * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand + * being detached. We return 0 if the sighand has been detached, or + * 1 if we were able to pin the sighand lock. + */ +static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) +{ + struct sighand_struct *sighand = NULL; + + rcu_read_lock(); + lk->tsk = rcu_dereference(ctx->tsk); + if (likely(lk->tsk != NULL)) + sighand = lock_task_sighand(lk->tsk, &lk->flags); + rcu_read_unlock(); + + if (sighand && !ctx->tsk) { + unlock_task_sighand(lk->tsk, &lk->flags); + sighand = NULL; + } + + return sighand != NULL; +} + +static void signalfd_unlock(struct signalfd_lockctx *lk) +{ + unlock_task_sighand(lk->tsk, &lk->flags); +} + +/* + * This must be called with the sighand lock held. + */ +void signalfd_deliver(struct task_struct *tsk, int sig) +{ + struct sighand_struct *sighand = tsk->sighand; + struct signalfd_ctx *ctx, *tmp; + + BUG_ON(!sig); + list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { + /* + * We use a negative signal value as a way to broadcast that the + * sighand has been orphaned, so that we can notify all the + * listeners about this. Remember the ctx->sigmask is inverted, + * so if the user is interested in a signal, that corresponding + * bit will be zero. + */ + if (sig < 0) { + if (ctx->tsk == tsk) { + ctx->tsk = NULL; + list_del_init(&ctx->lnk); + wake_up(&ctx->wqh); + } + } else { + if (!sigismember(&ctx->sigmask, sig)) + wake_up(&ctx->wqh); + } + } +} + +static void signalfd_cleanup(struct signalfd_ctx *ctx) +{ + struct signalfd_lockctx lk; + + /* + * This is tricky. If the sighand is gone, we do not need to remove + * context from the list, the list itself won't be there anymore. + */ + if (signalfd_lock(ctx, &lk)) { + list_del(&ctx->lnk); + signalfd_unlock(&lk); + } + kfree(ctx); +} + +static int signalfd_release(struct inode *inode, struct file *file) +{ + signalfd_cleanup(file->private_data); + return 0; +} + +static unsigned int signalfd_poll(struct file *file, poll_table *wait) +{ + struct signalfd_ctx *ctx = file->private_data; + unsigned int events = 0; + struct signalfd_lockctx lk; + + poll_wait(file, &ctx->wqh, wait); + + /* + * Let the caller get a POLLIN in this case, ala socket recv() when + * the peer disconnects. + */ + if (signalfd_lock(ctx, &lk)) { + if ((lk.tsk == current && + next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || + next_signal(&lk.tsk->signal->shared_pending, + &ctx->sigmask) > 0) + events |= POLLIN; + signalfd_unlock(&lk); + } else + events |= POLLIN; + + return events; +} + +/* + * Copied from copy_siginfo_to_user() in kernel/signal.c + */ +static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, + siginfo_t const *kinfo) +{ + long err; + + BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); + + /* + * Unused memebers should be zero ... + */ + err = __clear_user(uinfo, sizeof(*uinfo)); + + /* + * If you change siginfo_t structure, please be sure + * this code is fixed accordingly. + */ + err |= __put_user(kinfo->si_signo, &uinfo->signo); + err |= __put_user(kinfo->si_errno, &uinfo->err); + err |= __put_user((short)kinfo->si_code, &uinfo->code); + switch (kinfo->si_code & __SI_MASK) { + case __SI_KILL: + err |= __put_user(kinfo->si_pid, &uinfo->pid); + err |= __put_user(kinfo->si_uid, &uinfo->uid); + break; + case __SI_TIMER: + err |= __put_user(kinfo->si_tid, &uinfo->tid); + err |= __put_user(kinfo->si_overrun, &uinfo->overrun); + err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); + break; + case __SI_POLL: + err |= __put_user(kinfo->si_band, &uinfo->band); + err |= __put_user(kinfo->si_fd, &uinfo->fd); + break; + case __SI_FAULT: + err |= __put_user((long)kinfo->si_addr, &uinfo->addr); +#ifdef __ARCH_SI_TRAPNO + err |= __put_user(kinfo->si_trapno, &uinfo->trapno); +#endif + break; + case __SI_CHLD: + err |= __put_user(kinfo->si_pid, &uinfo->pid); + err |= __put_user(kinfo->si_uid, &uinfo->uid); + err |= __put_user(kinfo->si_status, &uinfo->status); + err |= __put_user(kinfo->si_utime, &uinfo->utime); + err |= __put_user(kinfo->si_stime, &uinfo->stime); + break; + case __SI_RT: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ: /* But this is */ + err |= __put_user(kinfo->si_pid, &uinfo->pid); + err |= __put_user(kinfo->si_uid, &uinfo->uid); + err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); + break; + default: /* this is just in case for now ... */ + err |= __put_user(kinfo->si_pid, &uinfo->pid); + err |= __put_user(kinfo->si_uid, &uinfo->uid); + break; + } + + return err ? -EFAULT: sizeof(*uinfo); +} + +static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info, + int nonblock) +{ + ssize_t ret; + struct signalfd_lockctx lk; + DECLARE_WAITQUEUE(wait, current); + + if (!signalfd_lock(ctx, &lk)) + return 0; + + ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); + switch (ret) { + case 0: + if (!nonblock) + break; + ret = -EAGAIN; + default: + signalfd_unlock(&lk); + return ret; + } + + add_wait_queue(&ctx->wqh, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); + signalfd_unlock(&lk); + if (ret != 0) + break; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + schedule(); + ret = signalfd_lock(ctx, &lk); + if (unlikely(!ret)) { + /* + * Let the caller read zero byte, ala socket + * recv() when the peer disconnect. This test + * must be done before doing a dequeue_signal(), + * because if the sighand has been orphaned, + * the dequeue_signal() call is going to crash + * because ->sighand will be long gone. + */ + break; + } + } + + remove_wait_queue(&ctx->wqh, &wait); + __set_current_state(TASK_RUNNING); + + return ret; +} + +/* + * Returns either the size of a "struct signalfd_siginfo", or zero if the + * sighand we are attached to, has been orphaned. The "count" parameter + * must be at least the size of a "struct signalfd_siginfo". + */ +static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct signalfd_ctx *ctx = file->private_data; + struct signalfd_siginfo __user *siginfo; + int nonblock = file->f_flags & O_NONBLOCK; + ssize_t ret, total = 0; + siginfo_t info; + + count /= sizeof(struct signalfd_siginfo); + if (!count) + return -EINVAL; + + siginfo = (struct signalfd_siginfo __user *) buf; + + do { + ret = signalfd_dequeue(ctx, &info, nonblock); + if (unlikely(ret <= 0)) + break; + ret = signalfd_copyinfo(siginfo, &info); + if (ret < 0) + break; + siginfo++; + total += ret; + nonblock = 1; + } while (--count); + + return total ? total : ret; +} + +static const struct file_operations signalfd_fops = { + .release = signalfd_release, + .poll = signalfd_poll, + .read = signalfd_read, +}; + +/* + * Create a file descriptor that is associated with our signal + * state. We can pass it around to others if we want to, but + * it will always be _our_ signal state. + */ +asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) +{ + int error; + sigset_t sigmask; + struct signalfd_ctx *ctx; + struct sighand_struct *sighand; + struct file *file; + struct inode *inode; + struct signalfd_lockctx lk; + + if (sizemask != sizeof(sigset_t) || + copy_from_user(&sigmask, user_mask, sizeof(sigmask))) + return error = -EINVAL; + sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + signotset(&sigmask); + + if (ufd == -1) { + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + init_waitqueue_head(&ctx->wqh); + ctx->sigmask = sigmask; + ctx->tsk = current; + + sighand = current->sighand; + /* + * Add this fd to the list of signal listeners. + */ + spin_lock_irq(&sighand->siglock); + list_add_tail(&ctx->lnk, &sighand->signalfd_list); + spin_unlock_irq(&sighand->siglock); + + /* + * When we call this, the initialization must be complete, since + * anon_inode_getfd() will install the fd. + */ + error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", + &signalfd_fops, ctx); + if (error) + goto err_fdalloc; + } else { + file = fget(ufd); + if (!file) + return -EBADF; + ctx = file->private_data; + if (file->f_op != &signalfd_fops) { + fput(file); + return -EINVAL; + } + /* + * We need to be prepared of the fact that the sighand this fd + * is attached to, has been detched. In that case signalfd_lock() + * will return 0, and we'll just skip setting the new mask. + */ + if (signalfd_lock(ctx, &lk)) { + ctx->sigmask = sigmask; + signalfd_unlock(&lk); + } + wake_up(&ctx->wqh); + fput(file); + } + + return ufd; + +err_fdalloc: + signalfd_cleanup(ctx); + return error; +} + diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 50136b1a3ec..48da4fa6b7d 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -13,6 +13,7 @@ #include <linux/smp_lock.h> #include <linux/ctype.h> #include <linux/net.h> +#include <linux/sched.h> #include <linux/smb_fs.h> #include <linux/smb_mount.h> diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index f161797160c..aea3f8aa54c 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -17,6 +17,7 @@ #include <linux/pagemap.h> #include <linux/smp_lock.h> #include <linux/net.h> +#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 424a3ddf86d..6724a6cf01f 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -25,6 +25,7 @@ #include <linux/net.h> #include <linux/vfs.h> #include <linux/highuid.h> +#include <linux/sched.h> #include <linux/smb_fs.h> #include <linux/smbno.h> #include <linux/smb_mount.h> @@ -70,8 +71,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct smb_inode_info *ei = (struct smb_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index 723f7c66766..3f54a0f80fa 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -6,10 +6,12 @@ * Please add a note about your changes to smbfs in the ChangeLog file. */ +#include <linux/kernel.h> #include <linux/types.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/net.h> +#include <linux/sched.h> #include <linux/smb_fs.h> #include <linux/smbno.h> @@ -22,8 +24,6 @@ /* #define SMB_SLAB_DEBUG (SLAB_RED_ZONE | SLAB_POISON) */ #define SMB_SLAB_DEBUG 0 -#define ROUND_UP(x) (((x)+3) & ~3) - /* cache for request structures */ static struct kmem_cache *req_cachep; @@ -200,8 +200,8 @@ static int smb_setup_trans2request(struct smb_request *req) const int smb_parameters = 15; const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2; - const int oparam = ROUND_UP(header + 3); - const int odata = ROUND_UP(oparam + req->rq_lparm); + const int oparam = ALIGN(header + 3, sizeof(u32)); + const int odata = ALIGN(oparam + req->rq_lparm, sizeof(u32)); const int bcc = (req->rq_data ? odata + req->rq_ldata : oparam + req->rq_lparm) - header; diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 89eaf31f1d4..67176af8515 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -16,7 +16,6 @@ #include <linux/init.h> #include <linux/file.h> #include <linux/dcache.h> -#include <linux/smp_lock.h> #include <linux/module.h> #include <linux/net.h> #include <linux/kthread.h> @@ -299,8 +298,6 @@ out: */ static int smbiod(void *unused) { - allow_signal(SIGKILL); - VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid); for (;;) { diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index 92ea6b2367d..e48bd8235a8 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c @@ -17,7 +17,6 @@ #include <linux/net.h> #include <linux/mm.h> #include <linux/netdevice.h> -#include <linux/smp_lock.h> #include <linux/workqueue.h> #include <net/scm.h> #include <net/tcp_states.h> diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c index fea20ceb8a5..00b2909bd46 100644 --- a/fs/smbfs/symlink.c +++ b/fs/smbfs/symlink.c @@ -13,7 +13,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/net.h> #include <linux/namei.h> diff --git a/fs/splice.c b/fs/splice.c index 5428b0ff3b6..e7d7080de2f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -176,6 +176,7 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { + unsigned int spd_pages = spd->nr_pages; int ret, do_wakeup, page_nr; ret = 0; @@ -244,17 +245,18 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, pipe->waiting_writers--; } - if (pipe->inode) + if (pipe->inode) { mutex_unlock(&pipe->inode->i_mutex); - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + } } - while (page_nr < spd->nr_pages) + while (page_nr < spd_pages) page_cache_release(spd->pages[page_nr++]); return ret; @@ -272,7 +274,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, struct page *page; pgoff_t index, end_index; loff_t isize; - size_t total_len; int error, page_nr; struct splice_pipe_desc spd = { .pages = pages, @@ -289,18 +290,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, nr_pages = PIPE_BUFFERS; /* - * Initiate read-ahead on this page range. however, don't call into - * read-ahead if this is a non-zero offset (we are likely doing small - * chunk splice and the page is already there) for a single page. + * Don't try to 2nd guess the read-ahead logic, call into + * page_cache_readahead() like the page cache reads would do. */ - if (!loff || nr_pages > 1) - page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); + page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); /* * Now fill in the holes: */ error = 0; - total_len = 0; /* * Lookup the (hopefully) full range of pages we need. @@ -378,10 +376,11 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * If in nonblock mode then dont block on waiting * for an in-flight io page */ - if (flags & SPLICE_F_NONBLOCK) - break; - - lock_page(page); + if (flags & SPLICE_F_NONBLOCK) { + if (TestSetPageLocked(page)) + break; + } else + lock_page(page); /* * page was truncated, stop here. if this isn't the @@ -416,43 +415,47 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; } + } +fill_it: + /* + * i_size must be checked after PageUptodate. + */ + isize = i_size_read(mapping->host); + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!isize || index > end_index)) + break; + + /* + * if this is the last page, see if we need to shrink + * the length and stop + */ + if (end_index == index) { + unsigned int plen; /* - * i_size must be checked after ->readpage(). + * max good bytes in this page */ - isize = i_size_read(mapping->host); - end_index = (isize - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(!isize || index > end_index)) + plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + if (plen <= loff) break; /* - * if this is the last page, see if we need to shrink - * the length and stop + * force quit after adding this page */ - if (end_index == index) { - loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); - if (total_len + loff > isize) - break; - /* - * force quit after adding this page - */ - len = this_len; - this_len = min(this_len, loff); - loff = 0; - } + this_len = min(this_len, plen - loff); + len = this_len; } -fill_it: + partial[page_nr].offset = loff; partial[page_nr].len = this_len; len -= this_len; - total_len += this_len; loff = 0; spd.nr_pages++; index++; } /* - * Release any pages at the end, if we quit early. 'i' is how far + * Release any pages at the end, if we quit early. 'page_nr' is how far * we got, 'nr_pages' is how many pages are in the map. */ while (page_nr < nr_pages) @@ -479,10 +482,18 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, { ssize_t spliced; int ret; + loff_t isize, left; + + isize = i_size_read(in->f_mapping->host); + if (unlikely(*ppos >= isize)) + return 0; + + left = isize - *ppos; + if (unlikely(left < len)) + len = left; ret = 0; spliced = 0; - while (len) { ret = __generic_file_splice_read(in, ppos, pipe, len, flags); @@ -645,7 +656,6 @@ find_page: * accessed, we are now done! */ mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); out: page_cache_release(page); unlock_page(page); @@ -803,7 +813,10 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { + unsigned long nr_pages; + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* * If file or inode is SYNC and we actually wrote some data, @@ -816,6 +829,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, if (err) ret = err; } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); } return ret; @@ -854,7 +868,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { + unsigned long nr_pages; + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* * If file or inode is SYNC and we actually wrote some data, @@ -869,6 +886,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, if (err) ret = err; } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); } return ret; @@ -923,7 +941,6 @@ static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - loff_t isize, left; int ret; if (unlikely(!in->f_op || !in->f_op->splice_read)) @@ -936,14 +953,6 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - isize = i_size_read(in->f_mapping->host); - if (unlikely(*ppos >= isize)) - return 0; - - left = isize - *ppos; - if (unlikely(left < len)) - len = left; - return in->f_op->splice_read(in, ppos, pipe, len, flags); } @@ -1059,8 +1068,6 @@ out_release: return ret; } -EXPORT_SYMBOL(do_splice_direct); - /* * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a diff --git a/fs/stat.c b/fs/stat.c index 38a8cb2a28d..68510068a64 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -8,7 +8,6 @@ #include <linux/mm.h> #include <linux/errno.h> #include <linux/file.h> -#include <linux/smp_lock.h> #include <linux/highuid.h> #include <linux/fs.h> #include <linux/namei.h> diff --git a/fs/super.c b/fs/super.c index 8341e4e1d73..5260d620c55 100644 --- a/fs/super.c +++ b/fs/super.c @@ -107,6 +107,7 @@ out: static inline void destroy_super(struct super_block *s) { security_sb_free(s); + kfree(s->s_subtype); kfree(s); } @@ -907,6 +908,29 @@ out: EXPORT_SYMBOL_GPL(vfs_kern_mount); +static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) +{ + int err; + const char *subtype = strchr(fstype, '.'); + if (subtype) { + subtype++; + err = -EINVAL; + if (!subtype[0]) + goto err; + } else + subtype = ""; + + mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); + err = -ENOMEM; + if (!mnt->mnt_sb->s_subtype) + goto err; + return mnt; + + err: + mntput(mnt); + return ERR_PTR(err); +} + struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { @@ -915,6 +939,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) if (!type) return ERR_PTR(-ENODEV); mnt = vfs_kern_mount(type, flags, name, data); + if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && + !mnt->mnt_sb->s_subtype) + mnt = fs_set_subtype(mnt, fstype); put_filesystem(type); return mnt; } diff --git a/fs/sync.c b/fs/sync.c index 5cb9e7e4338..7cd005ea763 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -229,13 +229,21 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, !S_ISLNK(i_mode)) goto out_put; - ret = do_sync_file_range(file, offset, endbyte, flags); + ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); out_put: fput_light(file, fput_needed); out: return ret; } +/* It would be nice if people remember that not all the world's an i386 + when they introduce new system calls */ +asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, + loff_t offset, loff_t nbytes) +{ + return sys_sync_file_range(fd, offset, nbytes, flags); +} + /* * `endbyte' is inclusive */ diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 85a668680f8..c4342a01997 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -13,14 +13,26 @@ #include "sysfs.h" DECLARE_RWSEM(sysfs_rename_sem); +spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED; static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) { struct sysfs_dirent * sd = dentry->d_fsdata; if (sd) { - BUG_ON(sd->s_dentry != dentry); - sd->s_dentry = NULL; + /* sd->s_dentry is protected with sysfs_lock. This + * allows sysfs_drop_dentry() to dereference it. + */ + spin_lock(&sysfs_lock); + + /* The dentry might have been deleted or another + * lookup could have happened updating sd->s_dentry to + * point the new dentry. Ignore if it isn't pointing + * to this dentry. + */ + if (sd->s_dentry == dentry) + sd->s_dentry = NULL; + spin_unlock(&sysfs_lock); sysfs_put(sd); } iput(inode); @@ -30,6 +42,14 @@ static struct dentry_operations sysfs_dentry_ops = { .d_iput = sysfs_d_iput, }; +static unsigned int sysfs_inode_counter; +ino_t sysfs_get_inum(void) +{ + if (unlikely(sysfs_inode_counter < 3)) + sysfs_inode_counter = 3; + return sysfs_inode_counter++; +} + /* * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent */ @@ -41,6 +61,7 @@ static struct sysfs_dirent * __sysfs_new_dirent(void * element) if (!sd) return NULL; + sd->s_ino = sysfs_get_inum(); atomic_set(&sd->s_count, 1); atomic_set(&sd->s_event, 1); INIT_LIST_HEAD(&sd->s_children); @@ -238,7 +259,10 @@ static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry) } dentry->d_fsdata = sysfs_get(sd); + /* protect sd->s_dentry against sysfs_d_iput */ + spin_lock(&sysfs_lock); sd->s_dentry = dentry; + spin_unlock(&sysfs_lock); error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init); if (error) { sysfs_put(sd); @@ -260,7 +284,10 @@ static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry) int err = 0; dentry->d_fsdata = sysfs_get(sd); + /* protect sd->s_dentry against sysfs_d_iput */ + spin_lock(&sysfs_lock); sd->s_dentry = dentry; + spin_unlock(&sysfs_lock); err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink); if (!err) { dentry->d_op = &sysfs_dentry_ops; @@ -509,7 +536,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) switch (i) { case 0: - ino = dentry->d_inode->i_ino; + ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) break; filp->f_pos++; @@ -538,10 +565,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) name = sysfs_get_name(next); len = strlen(name); - if (next->s_dentry) - ino = next->s_dentry->d_inode->i_ino; - else - ino = iunique(sysfs_sb, 2); + ino = next->s_ino; if (filldir(dirent, name, len, filp->f_pos, ino, dt_type(next)) < 0) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 0e637adc2b8..b502c7197ec 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -111,36 +111,6 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer return ret; } - -/** - * flush_read_buffer - push buffer to userspace. - * @buffer: data buffer for file. - * @buf: user-passed buffer. - * @count: number of bytes requested. - * @ppos: file position. - * - * Copy the buffer we filled in fill_read_buffer() to userspace. - * This is done at the reader's leisure, copying and advancing - * the amount they specify each time. - * This may be called continuously until the buffer is empty. - */ -static int flush_read_buffer(struct sysfs_buffer * buffer, char __user * buf, - size_t count, loff_t * ppos) -{ - int error; - - if (*ppos > buffer->count) - return 0; - - if (count > (buffer->count - *ppos)) - count = buffer->count - *ppos; - - error = copy_to_user(buf,buffer->page + *ppos,count); - if (!error) - *ppos += count; - return error ? -EFAULT : count; -} - /** * sysfs_read_file - read an attribute. * @file: file pointer. @@ -177,7 +147,8 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", __FUNCTION__, count, *ppos, buffer->page); - retval = flush_read_buffer(buffer,buf,count,ppos); + retval = simple_read_from_buffer(buf, count, ppos, buffer->page, + buffer->count); out: up(&buffer->sem); return retval; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 4de5c6b8991..5266eec15f6 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -13,6 +13,7 @@ #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/errno.h> +#include <linux/sched.h> #include <asm/semaphore.h> #include "sysfs.h" @@ -140,6 +141,7 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; inode->i_op = &sysfs_inode_operations; + inode->i_ino = sd->s_ino; lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); if (sd->s_iattr) { @@ -244,13 +246,27 @@ static inline void orphan_all_buffers(struct inode *node) */ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) { - struct dentry * dentry = sd->s_dentry; + struct dentry *dentry = NULL; struct inode *inode; + /* We're not holding a reference to ->s_dentry dentry but the + * field will stay valid as long as sysfs_lock is held. + */ + spin_lock(&sysfs_lock); + spin_lock(&dcache_lock); + + /* dget dentry if it's still alive */ + if (sd->s_dentry && sd->s_dentry->d_inode) + dentry = dget_locked(sd->s_dentry); + + spin_unlock(&dcache_lock); + spin_unlock(&sysfs_lock); + + /* drop dentry */ if (dentry) { spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (!(d_unhashed(dentry) && dentry->d_inode)) { + if (!d_unhashed(dentry) && dentry->d_inode) { inode = dentry->d_inode; spin_lock(&inode->i_lock); __iget(inode); @@ -266,6 +282,8 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } + + dput(dentry); } } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 23a48a38e6a..00ab9125d39 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -33,6 +33,7 @@ static struct sysfs_dirent sysfs_root = { .s_element = NULL, .s_type = SYSFS_ROOT, .s_iattr = NULL, + .s_ino = 1, }; static void sysfs_clear_inode(struct inode *inode) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a77c57e5a6d..502c949c402 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -5,6 +5,7 @@ struct sysfs_dirent { void * s_element; int s_type; umode_t s_mode; + ino_t s_ino; struct dentry * s_dentry; struct iattr * s_iattr; atomic_t s_event; @@ -32,6 +33,7 @@ extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd); extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent); extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); +extern spinlock_t sysfs_lock; extern struct rw_semaphore sysfs_rename_sem; extern struct super_block * sysfs_sb; extern const struct file_operations sysfs_dir_operations; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3152d741560..56441169339 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -322,8 +322,7 @@ static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&si->vfs_inode); + inode_init_once(&si->vfs_inode); } const struct super_operations sysv_sops = { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 4e48abbd2b5..6bd850b7641 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -13,7 +13,6 @@ */ #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include "sysv.h" static int add_nondir(struct dentry *dentry, struct inode *inode) diff --git a/fs/timerfd.c b/fs/timerfd.c new file mode 100644 index 00000000000..af9eca5c023 --- /dev/null +++ b/fs/timerfd.c @@ -0,0 +1,225 @@ +/* + * fs/timerfd.c + * + * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> + * + * + * Thanks to Thomas Gleixner for code reviews and useful comments. + * + */ + +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/time.h> +#include <linux/hrtimer.h> +#include <linux/anon_inodes.h> +#include <linux/timerfd.h> + +struct timerfd_ctx { + struct hrtimer tmr; + ktime_t tintv; + wait_queue_head_t wqh; + int expired; +}; + +/* + * This gets called when the timer event triggers. We set the "expired" + * flag, but we do not re-arm the timer (in case it's necessary, + * tintv.tv64 != 0) until the timer is read. + */ +static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) +{ + struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); + unsigned long flags; + + spin_lock_irqsave(&ctx->wqh.lock, flags); + ctx->expired = 1; + wake_up_locked(&ctx->wqh); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return HRTIMER_NORESTART; +} + +static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, + const struct itimerspec *ktmr) +{ + enum hrtimer_mode htmode; + ktime_t texp; + + htmode = (flags & TFD_TIMER_ABSTIME) ? + HRTIMER_MODE_ABS: HRTIMER_MODE_REL; + + texp = timespec_to_ktime(ktmr->it_value); + ctx->expired = 0; + ctx->tintv = timespec_to_ktime(ktmr->it_interval); + hrtimer_init(&ctx->tmr, clockid, htmode); + ctx->tmr.expires = texp; + ctx->tmr.function = timerfd_tmrproc; + if (texp.tv64 != 0) + hrtimer_start(&ctx->tmr, texp, htmode); +} + +static int timerfd_release(struct inode *inode, struct file *file) +{ + struct timerfd_ctx *ctx = file->private_data; + + hrtimer_cancel(&ctx->tmr); + kfree(ctx); + return 0; +} + +static unsigned int timerfd_poll(struct file *file, poll_table *wait) +{ + struct timerfd_ctx *ctx = file->private_data; + unsigned int events = 0; + unsigned long flags; + + poll_wait(file, &ctx->wqh, wait); + + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->expired) + events |= POLLIN; + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return events; +} + +static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct timerfd_ctx *ctx = file->private_data; + ssize_t res; + u32 ticks = 0; + DECLARE_WAITQUEUE(wait, current); + + if (count < sizeof(ticks)) + return -EINVAL; + spin_lock_irq(&ctx->wqh.lock); + res = -EAGAIN; + if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { + __add_wait_queue(&ctx->wqh, &wait); + for (res = 0;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (ctx->expired) { + res = 0; + break; + } + if (signal_pending(current)) { + res = -ERESTARTSYS; + break; + } + spin_unlock_irq(&ctx->wqh.lock); + schedule(); + spin_lock_irq(&ctx->wqh.lock); + } + __remove_wait_queue(&ctx->wqh, &wait); + __set_current_state(TASK_RUNNING); + } + if (ctx->expired) { + ctx->expired = 0; + if (ctx->tintv.tv64 != 0) { + /* + * If tintv.tv64 != 0, this is a periodic timer that + * needs to be re-armed. We avoid doing it in the timer + * callback to avoid DoS attacks specifying a very + * short timer period. + */ + ticks = (u32) + hrtimer_forward(&ctx->tmr, + hrtimer_cb_get_time(&ctx->tmr), + ctx->tintv); + hrtimer_restart(&ctx->tmr); + } else + ticks = 1; + } + spin_unlock_irq(&ctx->wqh.lock); + if (ticks) + res = put_user(ticks, buf) ? -EFAULT: sizeof(ticks); + return res; +} + +static const struct file_operations timerfd_fops = { + .release = timerfd_release, + .poll = timerfd_poll, + .read = timerfd_read, +}; + +asmlinkage long sys_timerfd(int ufd, int clockid, int flags, + const struct itimerspec __user *utmr) +{ + int error; + struct timerfd_ctx *ctx; + struct file *file; + struct inode *inode; + struct itimerspec ktmr; + + if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) + return -EFAULT; + + if (clockid != CLOCK_MONOTONIC && + clockid != CLOCK_REALTIME) + return -EINVAL; + if (!timespec_valid(&ktmr.it_value) || + !timespec_valid(&ktmr.it_interval)) + return -EINVAL; + + if (ufd == -1) { + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + init_waitqueue_head(&ctx->wqh); + + timerfd_setup(ctx, clockid, flags, &ktmr); + + /* + * When we call this, the initialization must be complete, since + * anon_inode_getfd() will install the fd. + */ + error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", + &timerfd_fops, ctx); + if (error) + goto err_tmrcancel; + } else { + file = fget(ufd); + if (!file) + return -EBADF; + ctx = file->private_data; + if (file->f_op != &timerfd_fops) { + fput(file); + return -EINVAL; + } + /* + * We need to stop the existing timer before reprogramming + * it to the new values. + */ + for (;;) { + spin_lock_irq(&ctx->wqh.lock); + if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) + break; + spin_unlock_irq(&ctx->wqh.lock); + cpu_relax(); + } + /* + * Re-program the timer to the new value ... + */ + timerfd_setup(ctx, clockid, flags, &ktmr); + + spin_unlock_irq(&ctx->wqh.lock); + fput(file); + } + + return ufd; + +err_tmrcancel: + hrtimer_cancel(&ctx->tmr); + kfree(ctx); + return error; +} + diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index ea521f846d9..4cec9101568 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -427,9 +427,9 @@ static void udf_table_free_blocks(struct super_block * sb, { struct udf_sb_info *sbi = UDF_SB(sb); uint32_t start, end; - uint32_t nextoffset, oextoffset, elen; - kernel_lb_addr nbloc, obloc, eloc; - struct buffer_head *obh, *nbh; + uint32_t elen; + kernel_lb_addr eloc; + struct extent_position oepos, epos; int8_t etype; int i; @@ -457,14 +457,13 @@ static void udf_table_free_blocks(struct super_block * sb, start = bloc.logicalBlockNum + offset; end = bloc.logicalBlockNum + offset + count - 1; - oextoffset = nextoffset = sizeof(struct unallocSpaceEntry); + epos.offset = oepos.offset = sizeof(struct unallocSpaceEntry); elen = 0; - obloc = nbloc = UDF_I_LOCATION(table); - - obh = nbh = NULL; + epos.block = oepos.block = UDF_I_LOCATION(table); + epos.bh = oepos.bh = NULL; while (count && (etype = - udf_next_aext(table, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) != -1) + udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) == start)) @@ -482,7 +481,7 @@ static void udf_table_free_blocks(struct super_block * sb, start += count; count = 0; } - udf_write_aext(table, obloc, &oextoffset, eloc, elen, obh, 1); + udf_write_aext(table, &oepos, eloc, elen, 1); } else if (eloc.logicalBlockNum == (end + 1)) { @@ -502,20 +501,20 @@ static void udf_table_free_blocks(struct super_block * sb, end -= count; count = 0; } - udf_write_aext(table, obloc, &oextoffset, eloc, elen, obh, 1); + udf_write_aext(table, &oepos, eloc, elen, 1); } - if (nbh != obh) + if (epos.bh != oepos.bh) { i = -1; - obloc = nbloc; - udf_release_data(obh); - atomic_inc(&nbh->b_count); - obh = nbh; - oextoffset = 0; + oepos.block = epos.block; + brelse(oepos.bh); + get_bh(epos.bh); + oepos.bh = epos.bh; + oepos.offset = 0; } else - oextoffset = nextoffset; + oepos.offset = epos.offset; } if (count) @@ -547,55 +546,53 @@ static void udf_table_free_blocks(struct super_block * sb, adsize = sizeof(long_ad); else { - udf_release_data(obh); - udf_release_data(nbh); + brelse(oepos.bh); + brelse(epos.bh); goto error_return; } - if (nextoffset + (2 * adsize) > sb->s_blocksize) + if (epos.offset + (2 * adsize) > sb->s_blocksize) { char *sptr, *dptr; int loffset; - udf_release_data(obh); - obh = nbh; - obloc = nbloc; - oextoffset = nextoffset; + brelse(oepos.bh); + oepos = epos; /* Steal a block from the extent being free'd */ - nbloc.logicalBlockNum = eloc.logicalBlockNum; + epos.block.logicalBlockNum = eloc.logicalBlockNum; eloc.logicalBlockNum ++; elen -= sb->s_blocksize; - if (!(nbh = udf_tread(sb, - udf_get_lb_pblock(sb, nbloc, 0)))) + if (!(epos.bh = udf_tread(sb, + udf_get_lb_pblock(sb, epos.block, 0)))) { - udf_release_data(obh); + brelse(oepos.bh); goto error_return; } - aed = (struct allocExtDesc *)(nbh->b_data); - aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum); - if (nextoffset + adsize > sb->s_blocksize) + aed = (struct allocExtDesc *)(epos.bh->b_data); + aed->previousAllocExtLocation = cpu_to_le32(oepos.block.logicalBlockNum); + if (epos.offset + adsize > sb->s_blocksize) { - loffset = nextoffset; + loffset = epos.offset; aed->lengthAllocDescs = cpu_to_le32(adsize); - sptr = UDF_I_DATA(inode) + nextoffset - + sptr = UDF_I_DATA(inode) + epos.offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode) - adsize; - dptr = nbh->b_data + sizeof(struct allocExtDesc); + dptr = epos.bh->b_data + sizeof(struct allocExtDesc); memcpy(dptr, sptr, adsize); - nextoffset = sizeof(struct allocExtDesc) + adsize; + epos.offset = sizeof(struct allocExtDesc) + adsize; } else { - loffset = nextoffset + adsize; + loffset = epos.offset + adsize; aed->lengthAllocDescs = cpu_to_le32(0); - sptr = (obh)->b_data + nextoffset; - nextoffset = sizeof(struct allocExtDesc); + sptr = oepos.bh->b_data + epos.offset; + epos.offset = sizeof(struct allocExtDesc); - if (obh) + if (oepos.bh) { - aed = (struct allocExtDesc *)(obh)->b_data; + aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); } @@ -606,11 +603,11 @@ static void udf_table_free_blocks(struct super_block * sb, } } if (UDF_SB_UDFREV(sb) >= 0x0200) - udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, - nbloc.logicalBlockNum, sizeof(tag)); + udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, 1, + epos.block.logicalBlockNum, sizeof(tag)); else - udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, - nbloc.logicalBlockNum, sizeof(tag)); + udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 2, 1, + epos.block.logicalBlockNum, sizeof(tag)); switch (UDF_I_ALLOCTYPE(table)) { case ICBTAG_FLAG_AD_SHORT: @@ -619,7 +616,7 @@ static void udf_table_free_blocks(struct super_block * sb, sad->extLength = cpu_to_le32( EXT_NEXT_EXTENT_ALLOCDECS | sb->s_blocksize); - sad->extPosition = cpu_to_le32(nbloc.logicalBlockNum); + sad->extPosition = cpu_to_le32(epos.block.logicalBlockNum); break; } case ICBTAG_FLAG_AD_LONG: @@ -628,14 +625,14 @@ static void udf_table_free_blocks(struct super_block * sb, lad->extLength = cpu_to_le32( EXT_NEXT_EXTENT_ALLOCDECS | sb->s_blocksize); - lad->extLocation = cpu_to_lelb(nbloc); + lad->extLocation = cpu_to_lelb(epos.block); break; } } - if (obh) + if (oepos.bh) { - udf_update_tag(obh->b_data, loffset); - mark_buffer_dirty(obh); + udf_update_tag(oepos.bh->b_data, loffset); + mark_buffer_dirty(oepos.bh); } else mark_inode_dirty(table); @@ -643,26 +640,26 @@ static void udf_table_free_blocks(struct super_block * sb, if (elen) /* It's possible that stealing the block emptied the extent */ { - udf_write_aext(table, nbloc, &nextoffset, eloc, elen, nbh, 1); + udf_write_aext(table, &epos, eloc, elen, 1); - if (!nbh) + if (!epos.bh) { UDF_I_LENALLOC(table) += adsize; mark_inode_dirty(table); } else { - aed = (struct allocExtDesc *)nbh->b_data; + aed = (struct allocExtDesc *)epos.bh->b_data; aed->lengthAllocDescs = cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); - udf_update_tag(nbh->b_data, nextoffset); - mark_buffer_dirty(nbh); + udf_update_tag(epos.bh->b_data, epos.offset); + mark_buffer_dirty(epos.bh); } } } - udf_release_data(nbh); - udf_release_data(obh); + brelse(epos.bh); + brelse(oepos.bh); error_return: sb->s_dirt = 1; @@ -677,9 +674,9 @@ static int udf_table_prealloc_blocks(struct super_block * sb, { struct udf_sb_info *sbi = UDF_SB(sb); int alloc_count = 0; - uint32_t extoffset, elen, adsize; - kernel_lb_addr bloc, eloc; - struct buffer_head *bh; + uint32_t elen, adsize; + kernel_lb_addr eloc; + struct extent_position epos; int8_t etype = -1; if (first_block < 0 || first_block >= UDF_SB_PARTLEN(sb, partition)) @@ -693,14 +690,13 @@ static int udf_table_prealloc_blocks(struct super_block * sb, return 0; mutex_lock(&sbi->s_alloc_mutex); - extoffset = sizeof(struct unallocSpaceEntry); - bloc = UDF_I_LOCATION(table); - - bh = NULL; + epos.offset = sizeof(struct unallocSpaceEntry); + epos.block = UDF_I_LOCATION(table); + epos.bh = NULL; eloc.logicalBlockNum = 0xFFFFFFFF; while (first_block != eloc.logicalBlockNum && (etype = - udf_next_aext(table, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) + udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { udf_debug("eloc=%d, elen=%d, first_block=%d\n", eloc.logicalBlockNum, elen, first_block); @@ -709,7 +705,7 @@ static int udf_table_prealloc_blocks(struct super_block * sb, if (first_block == eloc.logicalBlockNum) { - extoffset -= adsize; + epos.offset -= adsize; alloc_count = (elen >> sb->s_blocksize_bits); if (inode && DQUOT_PREALLOC_BLOCK(inode, alloc_count > block_count ? block_count : alloc_count)) @@ -719,15 +715,15 @@ static int udf_table_prealloc_blocks(struct super_block * sb, alloc_count = block_count; eloc.logicalBlockNum += alloc_count; elen -= (alloc_count << sb->s_blocksize_bits); - udf_write_aext(table, bloc, &extoffset, eloc, (etype << 30) | elen, bh, 1); + udf_write_aext(table, &epos, eloc, (etype << 30) | elen, 1); } else - udf_delete_aext(table, bloc, extoffset, eloc, (etype << 30) | elen, bh); + udf_delete_aext(table, epos, eloc, (etype << 30) | elen); } else alloc_count = 0; - udf_release_data(bh); + brelse(epos.bh); if (alloc_count && UDF_SB_LVIDBH(sb)) { @@ -747,9 +743,9 @@ static int udf_table_new_block(struct super_block * sb, struct udf_sb_info *sbi = UDF_SB(sb); uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF; uint32_t newblock = 0, adsize; - uint32_t extoffset, goal_extoffset, elen, goal_elen = 0; - kernel_lb_addr bloc, goal_bloc, eloc, goal_eloc; - struct buffer_head *bh, *goal_bh; + uint32_t elen, goal_elen = 0; + kernel_lb_addr eloc, goal_eloc; + struct extent_position epos, goal_epos; int8_t etype; *err = -ENOSPC; @@ -770,14 +766,12 @@ static int udf_table_new_block(struct super_block * sb, We store the buffer_head, bloc, and extoffset of the current closest match and use that when we are done. */ - - extoffset = sizeof(struct unallocSpaceEntry); - bloc = UDF_I_LOCATION(table); - - goal_bh = bh = NULL; + epos.offset = sizeof(struct unallocSpaceEntry); + epos.block = UDF_I_LOCATION(table); + epos.bh = goal_epos.bh = NULL; while (spread && (etype = - udf_next_aext(table, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) + udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { if (goal >= eloc.logicalBlockNum) { @@ -793,24 +787,24 @@ static int udf_table_new_block(struct super_block * sb, if (nspread < spread) { spread = nspread; - if (goal_bh != bh) + if (goal_epos.bh != epos.bh) { - udf_release_data(goal_bh); - goal_bh = bh; - atomic_inc(&goal_bh->b_count); + brelse(goal_epos.bh); + goal_epos.bh = epos.bh; + get_bh(goal_epos.bh); } - goal_bloc = bloc; - goal_extoffset = extoffset - adsize; + goal_epos.block = epos.block; + goal_epos.offset = epos.offset - adsize; goal_eloc = eloc; goal_elen = (etype << 30) | elen; } } - udf_release_data(bh); + brelse(epos.bh); if (spread == 0xFFFFFFFF) { - udf_release_data(goal_bh); + brelse(goal_epos.bh); mutex_unlock(&sbi->s_alloc_mutex); return 0; } @@ -826,17 +820,17 @@ static int udf_table_new_block(struct super_block * sb, if (inode && DQUOT_ALLOC_BLOCK(inode, 1)) { - udf_release_data(goal_bh); + brelse(goal_epos.bh); mutex_unlock(&sbi->s_alloc_mutex); *err = -EDQUOT; return 0; } if (goal_elen) - udf_write_aext(table, goal_bloc, &goal_extoffset, goal_eloc, goal_elen, goal_bh, 1); + udf_write_aext(table, &goal_epos, goal_eloc, goal_elen, 1); else - udf_delete_aext(table, goal_bloc, goal_extoffset, goal_eloc, goal_elen, goal_bh); - udf_release_data(goal_bh); + udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); + brelse(goal_epos.bh); if (UDF_SB_LVIDBH(sb)) { @@ -921,11 +915,14 @@ inline int udf_new_block(struct super_block * sb, struct inode * inode, uint16_t partition, uint32_t goal, int *err) { + int ret; + if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) { - return udf_bitmap_new_block(sb, inode, + ret = udf_bitmap_new_block(sb, inode, UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, partition, goal, err); + return ret; } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) { diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 2391c9150c4..e45f86b5e7b 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -111,11 +111,13 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d uint16_t liu; uint8_t lfi; loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; - struct buffer_head * bh = NULL, * tmp, * bha[16]; - kernel_lb_addr bloc, eloc; - uint32_t extoffset, elen, offset; + struct buffer_head *tmp, *bha[16]; + kernel_lb_addr eloc; + uint32_t elen; + sector_t offset; int i, num; unsigned int dt_type; + struct extent_position epos = { NULL, 0, {0, 0}}; if (nf_pos >= size) return 0; @@ -127,23 +129,22 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh.sbh = fibh.ebh = NULL; else if (inode_bmap(dir, nf_pos >> (dir->i_sb->s_blocksize_bits - 2), - &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) + &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { - offset >>= dir->i_sb->s_blocksize_bits; block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) - extoffset -= sizeof(short_ad); + epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) - extoffset -= sizeof(long_ad); + epos.offset -= sizeof(long_ad); } else offset = 0; if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { - udf_release_data(bh); + brelse(epos.bh); return -EIO; } @@ -171,7 +172,7 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d } else { - udf_release_data(bh); + brelse(epos.bh); return -ENOENT; } @@ -179,14 +180,14 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d { filp->f_pos = nf_pos + 1; - fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); + fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); if (!fi) { if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); - udf_release_data(bh); + brelse(fibh.ebh); + brelse(fibh.sbh); + brelse(epos.bh); return 0; } @@ -244,9 +245,9 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d if (filldir(dirent, fname, flen, filp->f_pos, iblock, dt_type) < 0) { if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); - udf_release_data(bh); + brelse(fibh.ebh); + brelse(fibh.sbh); + brelse(epos.bh); return 0; } } @@ -255,9 +256,9 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d filp->f_pos = nf_pos + 1; if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); - udf_release_data(bh); + brelse(fibh.ebh); + brelse(fibh.sbh); + brelse(epos.bh); return 0; } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index fe751a2a0e4..198caa33027 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -36,14 +36,14 @@ udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size, if (!ad) { - udf_release_data(*bh); + brelse(*bh); *error = 1; return NULL; } if (*offset == dir->i_sb->s_blocksize) { - udf_release_data(*bh); + brelse(*bh); block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); if (!block) return NULL; @@ -57,7 +57,7 @@ udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size, remainder = dir->i_sb->s_blocksize - loffset; memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder); - udf_release_data(*bh); + brelse(*bh); block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); if (!block) return NULL; @@ -75,9 +75,9 @@ struct fileIdentDesc * udf_fileident_read(struct inode *dir, loff_t *nf_pos, struct udf_fileident_bh *fibh, struct fileIdentDesc *cfi, - kernel_lb_addr *bloc, uint32_t *extoffset, + struct extent_position *epos, kernel_lb_addr *eloc, uint32_t *elen, - uint32_t *offset, struct buffer_head **bh) + sector_t *offset) { struct fileIdentDesc *fi; int i, num, block; @@ -105,13 +105,11 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos, if (fibh->eoffset == dir->i_sb->s_blocksize) { - int lextoffset = *extoffset; + int lextoffset = epos->offset; - if (udf_next_aext(dir, bloc, extoffset, eloc, elen, bh, 1) != + if (udf_next_aext(dir, epos, eloc, elen, 1) != (EXT_RECORDED_ALLOCATED >> 30)) - { return NULL; - } block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); @@ -120,9 +118,9 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos, if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen) *offset = 0; else - *extoffset = lextoffset; + epos->offset = lextoffset; - udf_release_data(fibh->sbh); + brelse(fibh->sbh); if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) return NULL; fibh->soffset = fibh->eoffset = 0; @@ -151,7 +149,7 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos, } else if (fibh->sbh != fibh->ebh) { - udf_release_data(fibh->sbh); + brelse(fibh->sbh); fibh->sbh = fibh->ebh; } @@ -169,13 +167,11 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos, } else if (fibh->eoffset > dir->i_sb->s_blocksize) { - int lextoffset = *extoffset; + int lextoffset = epos->offset; - if (udf_next_aext(dir, bloc, extoffset, eloc, elen, bh, 1) != + if (udf_next_aext(dir, epos, eloc, elen, 1) != (EXT_RECORDED_ALLOCATED >> 30)) - { return NULL; - } block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); @@ -184,7 +180,7 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos, if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen) *offset = 0; else - *extoffset = lextoffset; + epos->offset = lextoffset; fibh->soffset -= dir->i_sb->s_blocksize; fibh->eoffset -= dir->i_sb->s_blocksize; diff --git a/fs/udf/file.c b/fs/udf/file.c index 40d5047defe..51b5764685e 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -36,6 +36,7 @@ #include <linux/smp_lock.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> +#include <linux/aio.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c index 5887d78cde4..6ded93e7c44 100644 --- a/fs/udf/fsync.c +++ b/fs/udf/fsync.c @@ -21,7 +21,6 @@ #include "udfdecl.h" #include <linux/fs.h> -#include <linux/smp_lock.h> static int udf_fsync_inode(struct inode *, int); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index ae21a0e59e9..bf7de0bdbab 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -49,10 +49,10 @@ MODULE_LICENSE("GPL"); static mode_t udf_convert_permissions(struct fileEntry *); static int udf_update_inode(struct inode *, int); static void udf_fill_inode(struct inode *, struct buffer_head *); -static struct buffer_head *inode_getblk(struct inode *, long, int *, +static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, long *, int *); -static int8_t udf_insert_aext(struct inode *, kernel_lb_addr, int, - kernel_lb_addr, uint32_t, struct buffer_head *); +static int8_t udf_insert_aext(struct inode *, struct extent_position, + kernel_lb_addr, uint32_t); static void udf_split_extents(struct inode *, int *, int, int, kernel_long_ad [EXTENT_MERGE_SIZE], int *); static void udf_prealloc_extents(struct inode *, int, int, @@ -61,7 +61,7 @@ static void udf_merge_extents(struct inode *, kernel_long_ad [EXTENT_MERGE_SIZE], int *); static void udf_update_extents(struct inode *, kernel_long_ad [EXTENT_MERGE_SIZE], int, int, - kernel_lb_addr, uint32_t, struct buffer_head **); + struct extent_position *); static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); /* @@ -100,14 +100,23 @@ no_delete: clear_inode(inode); } +/* + * If we are going to release inode from memory, we discard preallocation and + * truncate last inode extent to proper length. We could use drop_inode() but + * it's called under inode_lock and thus we cannot mark inode dirty there. We + * use clear_inode() but we have to make sure to write inode as it's not written + * automatically. + */ void udf_clear_inode(struct inode *inode) { if (!(inode->i_sb->s_flags & MS_RDONLY)) { lock_kernel(); + /* Discard preallocation for directories, symlinks, etc. */ udf_discard_prealloc(inode); + udf_truncate_tail_extent(inode); unlock_kernel(); + write_inode_now(inode, 1); } - kfree(UDF_I_DATA(inode)); UDF_I_DATA(inode) = NULL; } @@ -194,10 +203,11 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int *err) { int newblock; - struct buffer_head *sbh = NULL, *dbh = NULL; - kernel_lb_addr bloc, eloc; - uint32_t elen, extoffset; + struct buffer_head *dbh = NULL; + kernel_lb_addr eloc; + uint32_t elen; uint8_t alloctype; + struct extent_position epos; struct udf_fileident_bh sfibh, dfibh; loff_t f_pos = udf_ext0_offset(inode) >> 2; @@ -237,16 +247,16 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int mark_buffer_dirty_inode(dbh, inode); sfibh.soffset = sfibh.eoffset = (f_pos & ((inode->i_sb->s_blocksize - 1) >> 2)) << 2; - sbh = sfibh.sbh = sfibh.ebh = NULL; + sfibh.sbh = sfibh.ebh = NULL; dfibh.soffset = dfibh.eoffset = 0; dfibh.sbh = dfibh.ebh = dbh; while ( (f_pos < size) ) { UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; - sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, NULL, NULL, NULL, NULL); + sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, NULL, NULL); if (!sfi) { - udf_release_data(dbh); + brelse(dbh); return NULL; } UDF_I_ALLOCTYPE(inode) = alloctype; @@ -258,7 +268,7 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int sfi->fileIdent + le16_to_cpu(sfi->lengthOfImpUse))) { UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; - udf_release_data(dbh); + brelse(dbh); return NULL; } } @@ -266,16 +276,17 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0, UDF_I_LENALLOC(inode)); UDF_I_LENALLOC(inode) = 0; - bloc = UDF_I_LOCATION(inode); eloc.logicalBlockNum = *block; eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; elen = inode->i_size; UDF_I_LENEXTENTS(inode) = elen; - extoffset = udf_file_entry_alloc_offset(inode); - udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &sbh, 0); + epos.bh = NULL; + epos.block = UDF_I_LOCATION(inode); + epos.offset = udf_file_entry_alloc_offset(inode); + udf_add_aext(inode, &epos, eloc, elen, 0); /* UniqueID stuff */ - udf_release_data(sbh); + brelse(epos.bh); mark_inode_dirty(inode); return dbh; } @@ -354,53 +365,153 @@ udf_getblk(struct inode *inode, long block, int create, int *err) return NULL; } -static struct buffer_head * inode_getblk(struct inode * inode, long block, +/* Extend the file by 'blocks' blocks, return the number of extents added */ +int udf_extend_file(struct inode *inode, struct extent_position *last_pos, + kernel_long_ad *last_ext, sector_t blocks) +{ + sector_t add; + int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + struct super_block *sb = inode->i_sb; + kernel_lb_addr prealloc_loc = {0, 0}; + int prealloc_len = 0; + + /* The previous extent is fake and we should not extend by anything + * - there's nothing to do... */ + if (!blocks && fake) + return 0; + /* Round the last extent up to a multiple of block size */ + if (last_ext->extLength & (sb->s_blocksize - 1)) { + last_ext->extLength = + (last_ext->extLength & UDF_EXTENT_FLAG_MASK) | + (((last_ext->extLength & UDF_EXTENT_LENGTH_MASK) + + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1)); + UDF_I_LENEXTENTS(inode) = + (UDF_I_LENEXTENTS(inode) + sb->s_blocksize - 1) & + ~(sb->s_blocksize - 1); + } + /* Last extent are just preallocated blocks? */ + if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_ALLOCATED) { + /* Save the extent so that we can reattach it to the end */ + prealloc_loc = last_ext->extLocation; + prealloc_len = last_ext->extLength; + /* Mark the extent as a hole */ + last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | + (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + last_ext->extLocation.logicalBlockNum = 0; + last_ext->extLocation.partitionReferenceNum = 0; + } + /* Can we merge with the previous extent? */ + if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) { + add = ((1<<30) - sb->s_blocksize - (last_ext->extLength & + UDF_EXTENT_LENGTH_MASK)) >> sb->s_blocksize_bits; + if (add > blocks) + add = blocks; + blocks -= add; + last_ext->extLength += add << sb->s_blocksize_bits; + } + + if (fake) { + udf_add_aext(inode, last_pos, last_ext->extLocation, + last_ext->extLength, 1); + count++; + } + else + udf_write_aext(inode, last_pos, last_ext->extLocation, last_ext->extLength, 1); + /* Managed to do everything necessary? */ + if (!blocks) + goto out; + + /* All further extents will be NOT_RECORDED_NOT_ALLOCATED */ + last_ext->extLocation.logicalBlockNum = 0; + last_ext->extLocation.partitionReferenceNum = 0; + add = (1 << (30-sb->s_blocksize_bits)) - 1; + last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | (add << sb->s_blocksize_bits); + /* Create enough extents to cover the whole hole */ + while (blocks > add) { + blocks -= add; + if (udf_add_aext(inode, last_pos, last_ext->extLocation, + last_ext->extLength, 1) == -1) + return -1; + count++; + } + if (blocks) { + last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | + (blocks << sb->s_blocksize_bits); + if (udf_add_aext(inode, last_pos, last_ext->extLocation, + last_ext->extLength, 1) == -1) + return -1; + count++; + } +out: + /* Do we have some preallocated blocks saved? */ + if (prealloc_len) { + if (udf_add_aext(inode, last_pos, prealloc_loc, prealloc_len, 1) == -1) + return -1; + last_ext->extLocation = prealloc_loc; + last_ext->extLength = prealloc_len; + count++; + } + /* last_pos should point to the last written extent... */ + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + last_pos->offset -= sizeof(short_ad); + else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + last_pos->offset -= sizeof(long_ad); + else + return -1; + return count; +} + +static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, int *err, long *phys, int *new) { - struct buffer_head *pbh = NULL, *cbh = NULL, *nbh = NULL, *result = NULL; + static sector_t last_block; + struct buffer_head *result = NULL; kernel_long_ad laarr[EXTENT_MERGE_SIZE]; - uint32_t pextoffset = 0, cextoffset = 0, nextoffset = 0; + struct extent_position prev_epos, cur_epos, next_epos; int count = 0, startnum = 0, endnum = 0; - uint32_t elen = 0; - kernel_lb_addr eloc, pbloc, cbloc, nbloc; + uint32_t elen = 0, tmpelen; + kernel_lb_addr eloc, tmpeloc; int c = 1; - uint64_t lbcount = 0, b_off = 0; - uint32_t newblocknum, newblock, offset = 0; + loff_t lbcount = 0, b_off = 0; + uint32_t newblocknum, newblock; + sector_t offset = 0; int8_t etype; int goal = 0, pgoal = UDF_I_LOCATION(inode).logicalBlockNum; - char lastblock = 0; + int lastblock = 0; - pextoffset = cextoffset = nextoffset = udf_file_entry_alloc_offset(inode); - b_off = (uint64_t)block << inode->i_sb->s_blocksize_bits; - pbloc = cbloc = nbloc = UDF_I_LOCATION(inode); + prev_epos.offset = udf_file_entry_alloc_offset(inode); + prev_epos.block = UDF_I_LOCATION(inode); + prev_epos.bh = NULL; + cur_epos = next_epos = prev_epos; + b_off = (loff_t)block << inode->i_sb->s_blocksize_bits; /* find the extent which contains the block we are looking for. alternate between laarr[0] and laarr[1] for locations of the current extent, and the previous extent */ do { - if (pbh != cbh) + if (prev_epos.bh != cur_epos.bh) { - udf_release_data(pbh); - atomic_inc(&cbh->b_count); - pbh = cbh; + brelse(prev_epos.bh); + get_bh(cur_epos.bh); + prev_epos.bh = cur_epos.bh; } - if (cbh != nbh) + if (cur_epos.bh != next_epos.bh) { - udf_release_data(cbh); - atomic_inc(&nbh->b_count); - cbh = nbh; + brelse(cur_epos.bh); + get_bh(next_epos.bh); + cur_epos.bh = next_epos.bh; } lbcount += elen; - pbloc = cbloc; - cbloc = nbloc; + prev_epos.block = cur_epos.block; + cur_epos.block = next_epos.block; - pextoffset = cextoffset; - cextoffset = nextoffset; + prev_epos.offset = cur_epos.offset; + cur_epos.offset = next_epos.offset; - if ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) == -1) + if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1)) == -1) break; c = !c; @@ -418,6 +529,12 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block, b_off -= lbcount; offset = b_off >> inode->i_sb->s_blocksize_bits; + /* + * Move prev_epos and cur_epos into indirect extent if we are at + * the pointer to it + */ + udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, 0); + udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0); /* if the extent is allocated and recorded, return the block if the extent is not a multiple of the blocksize, round up */ @@ -429,54 +546,77 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block, elen = EXT_RECORDED_ALLOCATED | ((elen + inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize - 1)); - etype = udf_write_aext(inode, nbloc, &cextoffset, eloc, elen, nbh, 1); + etype = udf_write_aext(inode, &cur_epos, eloc, elen, 1); } - udf_release_data(pbh); - udf_release_data(cbh); - udf_release_data(nbh); + brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset); *phys = newblock; return NULL; } + last_block = block; + /* Are we beyond EOF? */ if (etype == -1) { - endnum = startnum = ((count > 1) ? 1 : count); - if (laarr[c].extLength & (inode->i_sb->s_blocksize - 1)) - { - laarr[c].extLength = - (laarr[c].extLength & UDF_EXTENT_FLAG_MASK) | - (((laarr[c].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & - ~(inode->i_sb->s_blocksize - 1)); - UDF_I_LENEXTENTS(inode) = - (UDF_I_LENEXTENTS(inode) + inode->i_sb->s_blocksize - 1) & - ~(inode->i_sb->s_blocksize - 1); + int ret; + + if (count) { + if (c) + laarr[0] = laarr[1]; + startnum = 1; + } + else { + /* Create a fake extent when there's not one */ + memset(&laarr[0].extLocation, 0x00, sizeof(kernel_lb_addr)); + laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; + /* Will udf_extend_file() create real extent from a fake one? */ + startnum = (offset > 0); + } + /* Create extents for the hole between EOF and offset */ + ret = udf_extend_file(inode, &prev_epos, laarr, offset); + if (ret == -1) { + brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); + /* We don't really know the error here so we just make + * something up */ + *err = -ENOSPC; + return NULL; } - c = !c; - laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - ((offset + 1) << inode->i_sb->s_blocksize_bits); - memset(&laarr[c].extLocation, 0x00, sizeof(kernel_lb_addr)); - count ++; - endnum ++; + c = 0; + offset = 0; + count += ret; + /* We are not covered by a preallocated extent? */ + if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) != EXT_NOT_RECORDED_ALLOCATED) { + /* Is there any real extent? - otherwise we overwrite + * the fake one... */ + if (count) + c = !c; + laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | + inode->i_sb->s_blocksize; + memset(&laarr[c].extLocation, 0x00, sizeof(kernel_lb_addr)); + count ++; + endnum ++; + } + endnum = c+1; lastblock = 1; } - else + else { endnum = startnum = ((count > 2) ? 2 : count); - /* if the current extent is in position 0, swap it with the previous */ - if (!c && count != 1) - { - laarr[2] = laarr[0]; - laarr[0] = laarr[1]; - laarr[1] = laarr[2]; - c = 1; - } + /* if the current extent is in position 0, swap it with the previous */ + if (!c && count != 1) + { + laarr[2] = laarr[0]; + laarr[0] = laarr[1]; + laarr[1] = laarr[2]; + c = 1; + } - /* if the current block is located in a extent, read the next extent */ - if (etype != -1) - { - if ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 0)) != -1) + /* if the current block is located in an extent, read the next extent */ + if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0)) != -1) { laarr[c+1].extLength = (etype << 30) | elen; laarr[c+1].extLocation = eloc; @@ -484,11 +624,10 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block, startnum ++; endnum ++; } - else + else { lastblock = 1; + } } - udf_release_data(cbh); - udf_release_data(nbh); /* if the current extent is not recorded but allocated, get the block in the extent corresponding to the requested block */ @@ -508,7 +647,7 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block, if (!(newblocknum = udf_new_block(inode->i_sb, inode, UDF_I_LOCATION(inode).partitionReferenceNum, goal, err))) { - udf_release_data(pbh); + brelse(prev_epos.bh); *err = -ENOSPC; return NULL; } @@ -529,11 +668,11 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block, udf_merge_extents(inode, laarr, &endnum); /* write back the new extents, inserting new extents if the new number - of extents is greater than the old number, and deleting extents if - the new number of extents is less than the old number */ - udf_update_extents(inode, laarr, startnum, endnum, pbloc, pextoffset, &pbh); + of extents is greater than the old number, and deleting extents if + the new number of extents is less than the old number */ + udf_update_extents(inode, laarr, startnum, endnum, &prev_epos); - udf_release_data(pbh); + brelse(prev_epos.bh); if (!(newblock = udf_get_pblock(inode->i_sb, newblocknum, UDF_I_LOCATION(inode).partitionReferenceNum, 0))) @@ -795,7 +934,7 @@ static void udf_merge_extents(struct inode *inode, static void udf_update_extents(struct inode *inode, kernel_long_ad laarr[EXTENT_MERGE_SIZE], int startnum, int endnum, - kernel_lb_addr pbloc, uint32_t pextoffset, struct buffer_head **pbh) + struct extent_position *epos) { int start = 0, i; kernel_lb_addr tmploc; @@ -804,28 +943,26 @@ static void udf_update_extents(struct inode *inode, if (startnum > endnum) { for (i=0; i<(startnum-endnum); i++) - { - udf_delete_aext(inode, pbloc, pextoffset, laarr[i].extLocation, - laarr[i].extLength, *pbh); - } + udf_delete_aext(inode, *epos, laarr[i].extLocation, + laarr[i].extLength); } else if (startnum < endnum) { for (i=0; i<(endnum-startnum); i++) { - udf_insert_aext(inode, pbloc, pextoffset, laarr[i].extLocation, - laarr[i].extLength, *pbh); - udf_next_aext(inode, &pbloc, &pextoffset, &laarr[i].extLocation, - &laarr[i].extLength, pbh, 1); + udf_insert_aext(inode, *epos, laarr[i].extLocation, + laarr[i].extLength); + udf_next_aext(inode, epos, &laarr[i].extLocation, + &laarr[i].extLength, 1); start ++; } } for (i=start; i<endnum; i++) { - udf_next_aext(inode, &pbloc, &pextoffset, &tmploc, &tmplen, pbh, 0); - udf_write_aext(inode, pbloc, &pextoffset, laarr[i].extLocation, - laarr[i].extLength, *pbh, 1); + udf_next_aext(inode, epos, &tmploc, &tmplen, 0); + udf_write_aext(inode, epos, laarr[i].extLocation, + laarr[i].extLength, 1); } } @@ -931,7 +1068,7 @@ __udf_read_inode(struct inode *inode) { printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed ident=%d\n", inode->i_ino, ident); - udf_release_data(bh); + brelse(bh); make_bad_inode(inode); return; } @@ -960,35 +1097,36 @@ __udf_read_inode(struct inode *inode) ident == TAG_IDENT_EFE) { memcpy(&UDF_I_LOCATION(inode), &loc, sizeof(kernel_lb_addr)); - udf_release_data(bh); - udf_release_data(ibh); - udf_release_data(nbh); + brelse(bh); + brelse(ibh); + brelse(nbh); __udf_read_inode(inode); return; } else { - udf_release_data(nbh); - udf_release_data(ibh); + brelse(nbh); + brelse(ibh); } } else - udf_release_data(ibh); + brelse(ibh); } } else - udf_release_data(ibh); + brelse(ibh); } else if (le16_to_cpu(fe->icbTag.strategyType) != 4) { printk(KERN_ERR "udf: unsupported strategy type: %d\n", le16_to_cpu(fe->icbTag.strategyType)); - udf_release_data(bh); + brelse(bh); make_bad_inode(inode); return; } udf_fill_inode(inode, bh); - udf_release_data(bh); + + brelse(bh); } static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) @@ -1331,7 +1469,7 @@ udf_update_inode(struct inode *inode, int do_sync) use->descTag.tagChecksum += ((uint8_t *)&(use->descTag))[i]; mark_buffer_dirty(bh); - udf_release_data(bh); + brelse(bh); return err; } @@ -1520,7 +1658,7 @@ udf_update_inode(struct inode *inode, int do_sync) err = -EIO; } } - udf_release_data(bh); + brelse(bh); return err; } @@ -1556,8 +1694,8 @@ udf_iget(struct super_block *sb, kernel_lb_addr ino) return NULL; } -int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, - kernel_lb_addr eloc, uint32_t elen, struct buffer_head **bh, int inc) +int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, + kernel_lb_addr eloc, uint32_t elen, int inc) { int adsize; short_ad *sad = NULL; @@ -1566,10 +1704,10 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, int8_t etype; uint8_t *ptr; - if (!*bh) - ptr = UDF_I_DATA(inode) + *extoffset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + if (!epos->bh) + ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); else - ptr = (*bh)->b_data + *extoffset; + ptr = epos->bh->b_data + epos->offset; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); @@ -1578,20 +1716,20 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, else return -1; - if (*extoffset + (2 * adsize) > inode->i_sb->s_blocksize) + if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) { char *sptr, *dptr; struct buffer_head *nbh; int err, loffset; - kernel_lb_addr obloc = *bloc; + kernel_lb_addr obloc = epos->block; - if (!(bloc->logicalBlockNum = udf_new_block(inode->i_sb, NULL, + if (!(epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL, obloc.partitionReferenceNum, obloc.logicalBlockNum, &err))) { return -1; } if (!(nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, - *bloc, 0)))) + epos->block, 0)))) { return -1; } @@ -1604,25 +1742,25 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, aed = (struct allocExtDesc *)(nbh->b_data); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum); - if (*extoffset + adsize > inode->i_sb->s_blocksize) + if (epos->offset + adsize > inode->i_sb->s_blocksize) { - loffset = *extoffset; + loffset = epos->offset; aed->lengthAllocDescs = cpu_to_le32(adsize); sptr = ptr - adsize; dptr = nbh->b_data + sizeof(struct allocExtDesc); memcpy(dptr, sptr, adsize); - *extoffset = sizeof(struct allocExtDesc) + adsize; + epos->offset = sizeof(struct allocExtDesc) + adsize; } else { - loffset = *extoffset + adsize; + loffset = epos->offset + adsize; aed->lengthAllocDescs = cpu_to_le32(0); sptr = ptr; - *extoffset = sizeof(struct allocExtDesc); + epos->offset = sizeof(struct allocExtDesc); - if (*bh) + if (epos->bh) { - aed = (struct allocExtDesc *)(*bh)->b_data; + aed = (struct allocExtDesc *)epos->bh->b_data; aed->lengthAllocDescs = cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); } @@ -1634,10 +1772,10 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, } if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, - bloc->logicalBlockNum, sizeof(tag)); + epos->block.logicalBlockNum, sizeof(tag)); else udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, - bloc->logicalBlockNum, sizeof(tag)); + epos->block.logicalBlockNum, sizeof(tag)); switch (UDF_I_ALLOCTYPE(inode)) { case ICBTAG_FLAG_AD_SHORT: @@ -1646,7 +1784,7 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, sad->extLength = cpu_to_le32( EXT_NEXT_EXTENT_ALLOCDECS | inode->i_sb->s_blocksize); - sad->extPosition = cpu_to_le32(bloc->logicalBlockNum); + sad->extPosition = cpu_to_le32(epos->block.logicalBlockNum); break; } case ICBTAG_FLAG_AD_LONG: @@ -1655,60 +1793,57 @@ int8_t udf_add_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, lad->extLength = cpu_to_le32( EXT_NEXT_EXTENT_ALLOCDECS | inode->i_sb->s_blocksize); - lad->extLocation = cpu_to_lelb(*bloc); + lad->extLocation = cpu_to_lelb(epos->block); memset(lad->impUse, 0x00, sizeof(lad->impUse)); break; } } - if (*bh) + if (epos->bh) { if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag((*bh)->b_data, loffset); + udf_update_tag(epos->bh->b_data, loffset); else - udf_update_tag((*bh)->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(*bh, inode); - udf_release_data(*bh); + udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc)); + mark_buffer_dirty_inode(epos->bh, inode); + brelse(epos->bh); } else mark_inode_dirty(inode); - *bh = nbh; + epos->bh = nbh; } - etype = udf_write_aext(inode, *bloc, extoffset, eloc, elen, *bh, inc); + etype = udf_write_aext(inode, epos, eloc, elen, inc); - if (!*bh) + if (!epos->bh) { UDF_I_LENALLOC(inode) += adsize; mark_inode_dirty(inode); } else { - aed = (struct allocExtDesc *)(*bh)->b_data; + aed = (struct allocExtDesc *)epos->bh->b_data; aed->lengthAllocDescs = cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag((*bh)->b_data, *extoffset + (inc ? 0 : adsize)); + udf_update_tag(epos->bh->b_data, epos->offset + (inc ? 0 : adsize)); else - udf_update_tag((*bh)->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(*bh, inode); + udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc)); + mark_buffer_dirty_inode(epos->bh, inode); } return etype; } -int8_t udf_write_aext(struct inode *inode, kernel_lb_addr bloc, int *extoffset, - kernel_lb_addr eloc, uint32_t elen, struct buffer_head *bh, int inc) +int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, + kernel_lb_addr eloc, uint32_t elen, int inc) { int adsize; uint8_t *ptr; - if (!bh) - ptr = UDF_I_DATA(inode) + *extoffset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + if (!epos->bh) + ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); else - { - ptr = bh->b_data + *extoffset; - atomic_inc(&bh->b_count); - } + ptr = epos->bh->b_data + epos->offset; switch (UDF_I_ALLOCTYPE(inode)) { @@ -1733,40 +1868,39 @@ int8_t udf_write_aext(struct inode *inode, kernel_lb_addr bloc, int *extoffset, return -1; } - if (bh) + if (epos->bh) { if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) { - struct allocExtDesc *aed = (struct allocExtDesc *)(bh)->b_data; - udf_update_tag((bh)->b_data, + struct allocExtDesc *aed = (struct allocExtDesc *)epos->bh->b_data; + udf_update_tag(epos->bh->b_data, le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct allocExtDesc)); } - mark_buffer_dirty_inode(bh, inode); - udf_release_data(bh); + mark_buffer_dirty_inode(epos->bh, inode); } else mark_inode_dirty(inode); if (inc) - *extoffset += adsize; + epos->offset += adsize; return (elen >> 30); } -int8_t udf_next_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, - kernel_lb_addr *eloc, uint32_t *elen, struct buffer_head **bh, int inc) +int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, + kernel_lb_addr *eloc, uint32_t *elen, int inc) { int8_t etype; - while ((etype = udf_current_aext(inode, bloc, extoffset, eloc, elen, bh, inc)) == + while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { - *bloc = *eloc; - *extoffset = sizeof(struct allocExtDesc); - udf_release_data(*bh); - if (!(*bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, *bloc, 0)))) + epos->block = *eloc; + epos->offset = sizeof(struct allocExtDesc); + brelse(epos->bh); + if (!(epos->bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, epos->block, 0)))) { udf_debug("reading block %d failed!\n", - udf_get_lb_pblock(inode->i_sb, *bloc, 0)); + udf_get_lb_pblock(inode->i_sb, epos->block, 0)); return -1; } } @@ -1774,26 +1908,26 @@ int8_t udf_next_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, return etype; } -int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffset, - kernel_lb_addr *eloc, uint32_t *elen, struct buffer_head **bh, int inc) +int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, + kernel_lb_addr *eloc, uint32_t *elen, int inc) { int alen; int8_t etype; uint8_t *ptr; - if (!*bh) + if (!epos->bh) { - if (!(*extoffset)) - *extoffset = udf_file_entry_alloc_offset(inode); - ptr = UDF_I_DATA(inode) + *extoffset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + if (!epos->offset) + epos->offset = udf_file_entry_alloc_offset(inode); + ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); alen = udf_file_entry_alloc_offset(inode) + UDF_I_LENALLOC(inode); } else { - if (!(*extoffset)) - *extoffset = sizeof(struct allocExtDesc); - ptr = (*bh)->b_data + *extoffset; - alen = sizeof(struct allocExtDesc) + le32_to_cpu(((struct allocExtDesc *)(*bh)->b_data)->lengthAllocDescs); + if (!epos->offset) + epos->offset = sizeof(struct allocExtDesc); + ptr = epos->bh->b_data + epos->offset; + alen = sizeof(struct allocExtDesc) + le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)->lengthAllocDescs); } switch (UDF_I_ALLOCTYPE(inode)) @@ -1802,7 +1936,7 @@ int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffse { short_ad *sad; - if (!(sad = udf_get_fileshortad(ptr, alen, extoffset, inc))) + if (!(sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc))) return -1; etype = le32_to_cpu(sad->extLength) >> 30; @@ -1815,7 +1949,7 @@ int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffse { long_ad *lad; - if (!(lad = udf_get_filelongad(ptr, alen, extoffset, inc))) + if (!(lad = udf_get_filelongad(ptr, alen, &epos->offset, inc))) return -1; etype = le32_to_cpu(lad->extLength) >> 30; @@ -1834,41 +1968,40 @@ int8_t udf_current_aext(struct inode *inode, kernel_lb_addr *bloc, int *extoffse } static int8_t -udf_insert_aext(struct inode *inode, kernel_lb_addr bloc, int extoffset, - kernel_lb_addr neloc, uint32_t nelen, struct buffer_head *bh) +udf_insert_aext(struct inode *inode, struct extent_position epos, + kernel_lb_addr neloc, uint32_t nelen) { kernel_lb_addr oeloc; uint32_t oelen; int8_t etype; - if (bh) - atomic_inc(&bh->b_count); + if (epos.bh) + get_bh(epos.bh); - while ((etype = udf_next_aext(inode, &bloc, &extoffset, &oeloc, &oelen, &bh, 0)) != -1) + while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) { - udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); + udf_write_aext(inode, &epos, neloc, nelen, 1); neloc = oeloc; nelen = (etype << 30) | oelen; } - udf_add_aext(inode, &bloc, &extoffset, neloc, nelen, &bh, 1); - udf_release_data(bh); + udf_add_aext(inode, &epos, neloc, nelen, 1); + brelse(epos.bh); return (nelen >> 30); } -int8_t udf_delete_aext(struct inode *inode, kernel_lb_addr nbloc, int nextoffset, - kernel_lb_addr eloc, uint32_t elen, struct buffer_head *nbh) +int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, + kernel_lb_addr eloc, uint32_t elen) { - struct buffer_head *obh; - kernel_lb_addr obloc; - int oextoffset, adsize; + struct extent_position oepos; + int adsize; int8_t etype; struct allocExtDesc *aed; - if (nbh) + if (epos.bh) { - atomic_inc(&nbh->b_count); - atomic_inc(&nbh->b_count); + get_bh(epos.bh); + get_bh(epos.bh); } if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) @@ -1878,80 +2011,77 @@ int8_t udf_delete_aext(struct inode *inode, kernel_lb_addr nbloc, int nextoffset else adsize = 0; - obh = nbh; - obloc = nbloc; - oextoffset = nextoffset; - - if (udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1) == -1) + oepos = epos; + if (udf_next_aext(inode, &epos, &eloc, &elen, 1) == -1) return -1; - while ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) != -1) + while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { - udf_write_aext(inode, obloc, &oextoffset, eloc, (etype << 30) | elen, obh, 1); - if (obh != nbh) + udf_write_aext(inode, &oepos, eloc, (etype << 30) | elen, 1); + if (oepos.bh != epos.bh) { - obloc = nbloc; - udf_release_data(obh); - atomic_inc(&nbh->b_count); - obh = nbh; - oextoffset = nextoffset - adsize; + oepos.block = epos.block; + brelse(oepos.bh); + get_bh(epos.bh); + oepos.bh = epos.bh; + oepos.offset = epos.offset - adsize; } } memset(&eloc, 0x00, sizeof(kernel_lb_addr)); elen = 0; - if (nbh != obh) + if (epos.bh != oepos.bh) { - udf_free_blocks(inode->i_sb, inode, nbloc, 0, 1); - udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1); - udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1); - if (!obh) + udf_free_blocks(inode->i_sb, inode, epos.block, 0, 1); + udf_write_aext(inode, &oepos, eloc, elen, 1); + udf_write_aext(inode, &oepos, eloc, elen, 1); + if (!oepos.bh) { UDF_I_LENALLOC(inode) -= (adsize * 2); mark_inode_dirty(inode); } else { - aed = (struct allocExtDesc *)(obh)->b_data; + aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - (2*adsize)); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag((obh)->b_data, oextoffset - (2*adsize)); + udf_update_tag(oepos.bh->b_data, oepos.offset - (2*adsize)); else - udf_update_tag((obh)->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(obh, inode); + udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); + mark_buffer_dirty_inode(oepos.bh, inode); } } else { - udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1); - if (!obh) + udf_write_aext(inode, &oepos, eloc, elen, 1); + if (!oepos.bh) { UDF_I_LENALLOC(inode) -= adsize; mark_inode_dirty(inode); } else { - aed = (struct allocExtDesc *)(obh)->b_data; + aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - adsize); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag((obh)->b_data, oextoffset - adsize); + udf_update_tag(oepos.bh->b_data, epos.offset - adsize); else - udf_update_tag((obh)->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(obh, inode); + udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); + mark_buffer_dirty_inode(oepos.bh, inode); } } - udf_release_data(nbh); - udf_release_data(obh); + brelse(epos.bh); + brelse(oepos.bh); return (elen >> 30); } -int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t *extoffset, - kernel_lb_addr *eloc, uint32_t *elen, uint32_t *offset, struct buffer_head **bh) +int8_t inode_bmap(struct inode *inode, sector_t block, struct extent_position *pos, + kernel_lb_addr *eloc, uint32_t *elen, sector_t *offset) { - uint64_t lbcount = 0, bcount = (uint64_t)block << inode->i_sb->s_blocksize_bits; + loff_t lbcount = 0, bcount = (loff_t)block << inode->i_sb->s_blocksize_bits; int8_t etype; if (block < 0) @@ -1960,42 +2090,44 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t return -1; } - *extoffset = 0; + pos->offset = 0; + pos->block = UDF_I_LOCATION(inode); + pos->bh = NULL; *elen = 0; - *bloc = UDF_I_LOCATION(inode); do { - if ((etype = udf_next_aext(inode, bloc, extoffset, eloc, elen, bh, 1)) == -1) + if ((etype = udf_next_aext(inode, pos, eloc, elen, 1)) == -1) { - *offset = bcount - lbcount; + *offset = (bcount - lbcount) >> inode->i_sb->s_blocksize_bits; UDF_I_LENEXTENTS(inode) = lbcount; return -1; } lbcount += *elen; } while (lbcount <= bcount); - *offset = bcount + *elen - lbcount; + *offset = (bcount + *elen - lbcount) >> inode->i_sb->s_blocksize_bits; return etype; } -long udf_block_map(struct inode *inode, long block) +long udf_block_map(struct inode *inode, sector_t block) { - kernel_lb_addr eloc, bloc; - uint32_t offset, extoffset, elen; - struct buffer_head *bh = NULL; + kernel_lb_addr eloc; + uint32_t elen; + sector_t offset; + struct extent_position epos = { NULL, 0, { 0, 0}}; int ret; lock_kernel(); - if (inode_bmap(inode, block, &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) - ret = udf_get_lb_pblock(inode->i_sb, eloc, offset >> inode->i_sb->s_blocksize_bits); + if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) + ret = udf_get_lb_pblock(inode->i_sb, eloc, offset); else ret = 0; unlock_kernel(); - udf_release_data(bh); + brelse(epos.bh); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV)) return udf_fixed_to_variable(ret); diff --git a/fs/udf/misc.c b/fs/udf/misc.c index cc8ca3254db..a2b2a98ce78 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -274,12 +274,6 @@ udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, uint32_t offset, ui loc.logicalBlockNum + offset, ident); } -void udf_release_data(struct buffer_head *bh) -{ - if (bh) - brelse(bh); -} - void udf_update_tag(char *data, int length) { tag *tptr = (tag *)data; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index fe361cd19a9..51fe307dc0e 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -30,6 +30,7 @@ #include <linux/quotaops.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/sched.h> static inline int udf_match(int len1, const char *name1, int len2, const char *name2) { @@ -155,9 +156,10 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, uint8_t lfi; uint16_t liu; loff_t size; - kernel_lb_addr bloc, eloc; - uint32_t extoffset, elen, offset; - struct buffer_head *bh = NULL; + kernel_lb_addr eloc; + uint32_t elen; + sector_t offset; + struct extent_position epos = { NULL, 0, { 0, 0}}; size = (udf_ext0_offset(dir) + dir->i_size) >> 2; f_pos = (udf_ext0_offset(dir) >> 2); @@ -166,42 +168,41 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh->sbh = fibh->ebh = NULL; else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) + &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { - offset >>= dir->i_sb->s_blocksize_bits; block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) - extoffset -= sizeof(short_ad); + epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) - extoffset -= sizeof(long_ad); + epos.offset -= sizeof(long_ad); } else offset = 0; if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) { - udf_release_data(bh); + brelse(epos.bh); return NULL; } } else { - udf_release_data(bh); + brelse(epos.bh); return NULL; } while ( (f_pos < size) ) { - fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); + fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset); if (!fi) { if (fibh->sbh != fibh->ebh) - udf_release_data(fibh->ebh); - udf_release_data(fibh->sbh); - udf_release_data(bh); + brelse(fibh->ebh); + brelse(fibh->sbh); + brelse(epos.bh); return NULL; } @@ -247,15 +248,15 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, { if (udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) { - udf_release_data(bh); + brelse(epos.bh); return fi; } } } if (fibh->sbh != fibh->ebh) - udf_release_data(fibh->ebh); - udf_release_data(fibh->sbh); - udf_release_data(bh); + brelse(fibh->ebh); + brelse(fibh->sbh); + brelse(epos.bh); return NULL; } @@ -321,8 +322,8 @@ udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (udf_find_entry(dir, dentry, &fibh, &cfi)) { if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation)); if ( !inode ) @@ -353,9 +354,10 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, uint8_t lfi; uint16_t liu; int block; - kernel_lb_addr bloc, eloc; - uint32_t extoffset, elen, offset; - struct buffer_head *bh = NULL; + kernel_lb_addr eloc; + uint32_t elen; + sector_t offset; + struct extent_position epos = { NULL, 0, { 0, 0 }}; sb = dir->i_sb; @@ -384,23 +386,22 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh->sbh = fibh->ebh = NULL; else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) + &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { - offset >>= dir->i_sb->s_blocksize_bits; block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) - extoffset -= sizeof(short_ad); + epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) - extoffset -= sizeof(long_ad); + epos.offset -= sizeof(long_ad); } else offset = 0; if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) { - udf_release_data(bh); + brelse(epos.bh); *err = -EIO; return NULL; } @@ -418,14 +419,14 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, while ( (f_pos < size) ) { - fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); + fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset); if (!fi) { if (fibh->sbh != fibh->ebh) - udf_release_data(fibh->ebh); - udf_release_data(fibh->sbh); - udf_release_data(bh); + brelse(fibh->ebh); + brelse(fibh->sbh); + brelse(epos.bh); *err = -EIO; return NULL; } @@ -455,7 +456,7 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, { if (((sizeof(struct fileIdentDesc) + liu + lfi + 3) & ~3) == nfidlen) { - udf_release_data(bh); + brelse(epos.bh); cfi->descTag.tagSerialNum = cpu_to_le16(1); cfi->fileVersionNum = cpu_to_le16(1); cfi->fileCharacteristics = 0; @@ -478,9 +479,9 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) { if (fibh->sbh != fibh->ebh) - udf_release_data(fibh->ebh); - udf_release_data(fibh->sbh); - udf_release_data(bh); + brelse(fibh->ebh); + brelse(fibh->sbh); + brelse(epos.bh); *err = -EEXIST; return NULL; } @@ -492,25 +493,25 @@ add: if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB && sb->s_blocksize - fibh->eoffset < nfidlen) { - udf_release_data(bh); - bh = NULL; + brelse(epos.bh); + epos.bh = NULL; fibh->soffset -= udf_ext0_offset(dir); fibh->eoffset -= udf_ext0_offset(dir); f_pos -= (udf_ext0_offset(dir) >> 2); if (fibh->sbh != fibh->ebh) - udf_release_data(fibh->ebh); - udf_release_data(fibh->sbh); + brelse(fibh->ebh); + brelse(fibh->sbh); if (!(fibh->sbh = fibh->ebh = udf_expand_dir_adinicb(dir, &block, err))) return NULL; - bloc = UDF_I_LOCATION(dir); + epos.block = UDF_I_LOCATION(dir); eloc.logicalBlockNum = block; eloc.partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; elen = dir->i_sb->s_blocksize; - extoffset = udf_file_entry_alloc_offset(dir); + epos.offset = udf_file_entry_alloc_offset(dir); if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) - extoffset += sizeof(short_ad); + epos.offset += sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) - extoffset += sizeof(long_ad); + epos.offset += sizeof(long_ad); } if (sb->s_blocksize - fibh->eoffset >= nfidlen) @@ -519,7 +520,7 @@ add: fibh->eoffset += nfidlen; if (fibh->sbh != fibh->ebh) { - udf_release_data(fibh->sbh); + brelse(fibh->sbh); fibh->sbh = fibh->ebh; } @@ -541,7 +542,7 @@ add: fibh->eoffset += nfidlen - sb->s_blocksize; if (fibh->sbh != fibh->ebh) { - udf_release_data(fibh->sbh); + brelse(fibh->sbh); fibh->sbh = fibh->ebh; } @@ -550,14 +551,14 @@ add: if (!(fibh->ebh = udf_bread(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 1, err))) { - udf_release_data(bh); - udf_release_data(fibh->sbh); + brelse(epos.bh); + brelse(fibh->sbh); return NULL; } if (!(fibh->soffset)) { - if (udf_next_aext(dir, &bloc, &extoffset, &eloc, &elen, &bh, 1) == + if (udf_next_aext(dir, &epos, &eloc, &elen, 1) == (EXT_RECORDED_ALLOCATED >> 30)) { block = eloc.logicalBlockNum + ((elen - 1) >> @@ -566,7 +567,7 @@ add: else block ++; - udf_release_data(fibh->sbh); + brelse(fibh->sbh); fibh->sbh = fibh->ebh; fi = (struct fileIdentDesc *)(fibh->sbh->b_data); } @@ -587,7 +588,7 @@ add: cfi->lengthOfImpUse = cpu_to_le16(0); if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) { - udf_release_data(bh); + brelse(epos.bh); dir->i_size += nfidlen; if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) UDF_I_LENALLOC(dir) += nfidlen; @@ -596,10 +597,10 @@ add: } else { - udf_release_data(bh); + brelse(epos.bh); if (fibh->sbh != fibh->ebh) - udf_release_data(fibh->ebh); - udf_release_data(fibh->sbh); + brelse(fibh->ebh); + brelse(fibh->sbh); *err = -EIO; return NULL; } @@ -656,8 +657,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct mark_inode_dirty(dir); } if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); unlock_kernel(); d_instantiate(dentry, inode); return 0; @@ -701,8 +702,8 @@ static int udf_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t mark_inode_dirty(inode); if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); d_instantiate(dentry, inode); err = 0; out: @@ -743,7 +744,7 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) cpu_to_le32(UDF_I_UNIQUE(dir) & 0x00000000FFFFFFFFUL); cfi.fileCharacteristics = FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); - udf_release_data(fibh.sbh); + brelse(fibh.sbh); inode->i_mode = S_IFDIR | mode; if (dir->i_mode & S_ISGID) inode->i_mode |= S_ISGID; @@ -766,8 +767,8 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) mark_inode_dirty(dir); d_instantiate(dentry, inode); if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); err = 0; out: unlock_kernel(); @@ -781,9 +782,10 @@ static int empty_dir(struct inode *dir) loff_t f_pos; loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; int block; - kernel_lb_addr bloc, eloc; - uint32_t extoffset, elen, offset; - struct buffer_head *bh = NULL; + kernel_lb_addr eloc; + uint32_t elen; + sector_t offset; + struct extent_position epos = { NULL, 0, { 0, 0}}; f_pos = (udf_ext0_offset(dir) >> 2); @@ -792,59 +794,58 @@ static int empty_dir(struct inode *dir) if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh.sbh = fibh.ebh = NULL; else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &bloc, &extoffset, &eloc, &elen, &offset, &bh) == (EXT_RECORDED_ALLOCATED >> 30)) + &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { - offset >>= dir->i_sb->s_blocksize_bits; block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) - extoffset -= sizeof(short_ad); + epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) - extoffset -= sizeof(long_ad); + epos.offset -= sizeof(long_ad); } else offset = 0; if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { - udf_release_data(bh); + brelse(epos.bh); return 0; } } else { - udf_release_data(bh); + brelse(epos.bh); return 0; } while ( (f_pos < size) ) { - fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &bloc, &extoffset, &eloc, &elen, &offset, &bh); + fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); if (!fi) { if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); - udf_release_data(bh); + brelse(fibh.ebh); + brelse(fibh.sbh); + brelse(epos.bh); return 0; } if (cfi.lengthFileIdent && (cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) == 0) { if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); - udf_release_data(bh); + brelse(fibh.ebh); + brelse(fibh.sbh); + brelse(epos.bh); return 0; } } if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); - udf_release_data(bh); + brelse(fibh.ebh); + brelse(fibh.sbh); + brelse(epos.bh); return 1; } @@ -878,14 +879,14 @@ static int udf_rmdir(struct inode * dir, struct dentry * dentry) inode->i_nlink); clear_nlink(inode); inode->i_size = 0; - inode_dec_link_count(inode); + inode_dec_link_count(dir); inode->i_ctime = dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); mark_inode_dirty(dir); end_rmdir: if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); out: unlock_kernel(); return retval; @@ -928,8 +929,8 @@ static int udf_unlink(struct inode * dir, struct dentry * dentry) end_unlink: if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); out: unlock_kernel(); return retval; @@ -941,7 +942,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * struct pathComponent *pc; char *compstart; struct udf_fileident_bh fibh; - struct buffer_head *bh = NULL; + struct extent_position epos = { NULL, 0, {0, 0}}; int eoffset, elen = 0; struct fileIdentDesc *fi; struct fileIdentDesc cfi; @@ -961,33 +962,33 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * if (UDF_I_ALLOCTYPE(inode) != ICBTAG_FLAG_AD_IN_ICB) { - struct buffer_head *bh = NULL; - kernel_lb_addr bloc, eloc; - uint32_t elen, extoffset; + kernel_lb_addr eloc; + uint32_t elen; block = udf_new_block(inode->i_sb, inode, UDF_I_LOCATION(inode).partitionReferenceNum, UDF_I_LOCATION(inode).logicalBlockNum, &err); if (!block) goto out_no_entry; - bloc = UDF_I_LOCATION(inode); + epos.block = UDF_I_LOCATION(inode); + epos.offset = udf_file_entry_alloc_offset(inode); + epos.bh = NULL; eloc.logicalBlockNum = block; eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; elen = inode->i_sb->s_blocksize; UDF_I_LENEXTENTS(inode) = elen; - extoffset = udf_file_entry_alloc_offset(inode); - udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 0); - udf_release_data(bh); + udf_add_aext(inode, &epos, eloc, elen, 0); + brelse(epos.bh); block = udf_get_pblock(inode->i_sb, block, UDF_I_LOCATION(inode).partitionReferenceNum, 0); - bh = udf_tread(inode->i_sb, block); - lock_buffer(bh); - memset(bh->b_data, 0x00, inode->i_sb->s_blocksize); - set_buffer_uptodate(bh); - unlock_buffer(bh); - mark_buffer_dirty_inode(bh, inode); - ea = bh->b_data + udf_ext0_offset(inode); + epos.bh = udf_tread(inode->i_sb, block); + lock_buffer(epos.bh); + memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize); + set_buffer_uptodate(epos.bh); + unlock_buffer(epos.bh); + mark_buffer_dirty_inode(epos.bh, inode); + ea = epos.bh->b_data + udf_ext0_offset(inode); } else ea = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); @@ -1060,7 +1061,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * } } - udf_release_data(bh); + brelse(epos.bh); inode->i_size = elen; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) UDF_I_LENALLOC(inode) = inode->i_size; @@ -1089,8 +1090,8 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * mark_inode_dirty(dir); } if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); d_instantiate(dentry, inode); err = 0; @@ -1145,8 +1146,8 @@ static int udf_link(struct dentry * old_dentry, struct inode * dir, mark_inode_dirty(dir); } if (fibh.sbh != fibh.ebh) - udf_release_data(fibh.ebh); - udf_release_data(fibh.sbh); + brelse(fibh.ebh); + brelse(fibh.sbh); inc_nlink(inode); inode->i_ctime = current_fs_time(inode->i_sb); mark_inode_dirty(inode); @@ -1174,8 +1175,8 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, if ((ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi))) { if (ofibh.sbh != ofibh.ebh) - udf_release_data(ofibh.ebh); - udf_release_data(ofibh.sbh); + brelse(ofibh.ebh); + brelse(ofibh.sbh); } tloc = lelb_to_cpu(ocfi.icb.extLocation); if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0) @@ -1188,8 +1189,8 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, if (!new_inode) { if (nfibh.sbh != nfibh.ebh) - udf_release_data(nfibh.ebh); - udf_release_data(nfibh.sbh); + brelse(nfibh.ebh); + brelse(nfibh.sbh); nfi = NULL; } } @@ -1290,19 +1291,19 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, if (ofi) { if (ofibh.sbh != ofibh.ebh) - udf_release_data(ofibh.ebh); - udf_release_data(ofibh.sbh); + brelse(ofibh.ebh); + brelse(ofibh.sbh); } retval = 0; end_rename: - udf_release_data(dir_bh); + brelse(dir_bh); if (nfi) { if (nfibh.sbh != nfibh.ebh) - udf_release_data(nfibh.ebh); - udf_release_data(nfibh.sbh); + brelse(nfibh.ebh); + brelse(nfibh.sbh); } unlock_kernel(); return retval; diff --git a/fs/udf/partition.c b/fs/udf/partition.c index dabf2b841db..467a26171cd 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -81,7 +81,7 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, uint16_t loc = le32_to_cpu(((__le32 *)bh->b_data)[index]); - udf_release_data(bh); + brelse(bh); if (UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum == partition) { diff --git a/fs/udf/super.c b/fs/udf/super.c index 023b304fdd9..6658afb41cc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -134,10 +134,8 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct udf_inode_info *ei = (struct udf_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - ei->i_ext.i_data = NULL; - inode_init_once(&ei->vfs_inode); - } + ei->i_ext.i_data = NULL; + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) @@ -563,7 +561,7 @@ udf_vrs(struct super_block *sb, int silent) if (vsd->stdIdent[0] == 0) { - udf_release_data(bh); + brelse(bh); break; } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) @@ -596,7 +594,7 @@ udf_vrs(struct super_block *sb, int silent) } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_TEA01, VSD_STD_ID_LEN)) { - udf_release_data(bh); + brelse(bh); break; } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN)) @@ -607,7 +605,7 @@ udf_vrs(struct super_block *sb, int silent) { nsr03 = sector; } - udf_release_data(bh); + brelse(bh); } if (nsr03) @@ -673,7 +671,7 @@ udf_find_anchor(struct super_block *sb) { ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); - udf_release_data(bh); + brelse(bh); } if (ident == TAG_IDENT_AVDP) @@ -708,7 +706,7 @@ udf_find_anchor(struct super_block *sb) { ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); - udf_release_data(bh); + brelse(bh); } if (ident == TAG_IDENT_AVDP && @@ -727,7 +725,7 @@ udf_find_anchor(struct super_block *sb) { ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); - udf_release_data(bh); + brelse(bh); } if (ident == TAG_IDENT_AVDP && @@ -749,7 +747,7 @@ udf_find_anchor(struct super_block *sb) { ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); - udf_release_data(bh); + brelse(bh); if (ident == TAG_IDENT_AVDP && location == 256) UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); @@ -766,7 +764,7 @@ udf_find_anchor(struct super_block *sb) } else { - udf_release_data(bh); + brelse(bh); if ((ident != TAG_IDENT_AVDP) && (i || (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE))) { @@ -795,7 +793,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr return 1; else if (ident != TAG_IDENT_FSD) { - udf_release_data(bh); + brelse(bh); return 1; } @@ -834,7 +832,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr newfileset.logicalBlockNum += 1 + ((le32_to_cpu(sp->numOfBytes) + sizeof(struct spaceBitmapDesc) - 1) >> sb->s_blocksize_bits); - udf_release_data(bh); + brelse(bh); break; } case TAG_IDENT_FSD: @@ -845,7 +843,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr default: { newfileset.logicalBlockNum ++; - udf_release_data(bh); + brelse(bh); bh = NULL; break; } @@ -865,7 +863,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr UDF_SB_PARTITION(sb) = fileset->partitionReferenceNum; udf_load_fileset(sb, bh, root); - udf_release_data(bh); + brelse(bh); return 0; } return 1; @@ -1083,7 +1081,7 @@ udf_load_logicalvol(struct super_block *sb, struct buffer_head * bh, kernel_lb_a if (ident != 0 || strncmp(st->sparingIdent.ident, UDF_ID_SPARING, strlen(UDF_ID_SPARING))) { - udf_release_data(UDF_SB_TYPESPAR(sb,i).s_spar_map[j]); + brelse(UDF_SB_TYPESPAR(sb,i).s_spar_map[j]); UDF_SB_TYPESPAR(sb,i).s_spar_map[j] = NULL; } } @@ -1137,12 +1135,12 @@ udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) udf_load_logicalvolint(sb, leea_to_cpu(UDF_SB_LVID(sb)->nextIntegrityExt)); if (UDF_SB_LVIDBH(sb) != bh) - udf_release_data(bh); + brelse(bh); loc.extLength -= sb->s_blocksize; loc.extLocation ++; } if (UDF_SB_LVIDBH(sb) != bh) - udf_release_data(bh); + brelse(bh); } /* @@ -1245,7 +1243,7 @@ udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_ done = 1; break; } - udf_release_data(bh); + brelse(bh); } for (i=0; i<VDS_POS_LENGTH; i++) { @@ -1267,10 +1265,10 @@ udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_ gd = (struct generic_desc *)bh2->b_data; if (ident == TAG_IDENT_PD) udf_load_partdesc(sb, bh2); - udf_release_data(bh2); + brelse(bh2); } } - udf_release_data(bh); + brelse(bh); } } @@ -1333,7 +1331,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) reserve_e = reserve_e >> sb->s_blocksize_bits; reserve_e += reserve_s; - udf_release_data(bh); + brelse(bh); /* Process the main & reserve sequences */ /* responsible for finding the PartitionDesc(s) */ @@ -1353,7 +1351,7 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) for (i=0; i<UDF_SB_NUMPARTS(sb); i++) { - switch UDF_SB_PARTTYPE(sb, i) + switch (UDF_SB_PARTTYPE(sb, i)) { case UDF_VIRTUAL_MAP15: case UDF_VIRTUAL_MAP20: @@ -1403,12 +1401,14 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) pos = udf_block_map(UDF_SB_VAT(sb), 0); bh = sb_bread(sb, pos); + if (!bh) + return 1; UDF_SB_TYPEVIRT(sb,i).s_start_offset = le16_to_cpu(((struct virtualAllocationTable20 *)bh->b_data + udf_ext0_offset(UDF_SB_VAT(sb)))->lengthHeader) + udf_ext0_offset(UDF_SB_VAT(sb)); UDF_SB_TYPEVIRT(sb,i).s_num_entries = (UDF_SB_VAT(sb)->i_size - UDF_SB_TYPEVIRT(sb,i).s_start_offset) >> 2; - udf_release_data(bh); + brelse(bh); } UDF_SB_PARTROOT(sb,i) = udf_get_pblock(sb, 0, i, 0); UDF_SB_PARTLEN(sb,i) = UDF_SB_PARTLEN(sb,ino.partitionReferenceNum); @@ -1661,7 +1661,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) iput(inode); goto error_out; } - sb->s_maxbytes = 1<<30; + sb->s_maxbytes = MAX_LFS_FILESIZE; return 0; error_out: @@ -1680,7 +1680,7 @@ error_out: if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) { for (i=0; i<4; i++) - udf_release_data(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); + brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); } } #ifdef CONFIG_UDF_NLS @@ -1689,7 +1689,7 @@ error_out: #endif if (!(sb->s_flags & MS_RDONLY)) udf_close_lvid(sb); - udf_release_data(UDF_SB_LVIDBH(sb)); + brelse(UDF_SB_LVIDBH(sb)); UDF_SB_FREE(sb); kfree(sbi); sb->s_fs_info = NULL; @@ -1758,7 +1758,7 @@ udf_put_super(struct super_block *sb) if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) { for (i=0; i<4; i++) - udf_release_data(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); + brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); } } #ifdef CONFIG_UDF_NLS @@ -1767,7 +1767,7 @@ udf_put_super(struct super_block *sb) #endif if (!(sb->s_flags & MS_RDONLY)) udf_close_lvid(sb); - udf_release_data(UDF_SB_LVIDBH(sb)); + brelse(UDF_SB_LVIDBH(sb)); UDF_SB_FREE(sb); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -1837,7 +1837,7 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) } else if (ident != TAG_IDENT_SBD) { - udf_release_data(bh); + brelse(bh); printk(KERN_ERR "udf: udf_count_free failed\n"); goto out; } @@ -1859,7 +1859,7 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) } if ( bytes ) { - udf_release_data(bh); + brelse(bh); newblock = udf_get_lb_pblock(sb, loc, ++block); bh = udf_tread(sb, newblock); if (!bh) @@ -1871,7 +1871,7 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) ptr = (uint8_t *)bh->b_data; } } - udf_release_data(bh); + brelse(bh); out: unlock_kernel(); @@ -1883,21 +1883,20 @@ static unsigned int udf_count_free_table(struct super_block *sb, struct inode * table) { unsigned int accum = 0; - uint32_t extoffset, elen; - kernel_lb_addr bloc, eloc; + uint32_t elen; + kernel_lb_addr eloc; int8_t etype; - struct buffer_head *bh = NULL; + struct extent_position epos; lock_kernel(); - bloc = UDF_I_LOCATION(table); - extoffset = sizeof(struct unallocSpaceEntry); + epos.block = UDF_I_LOCATION(table); + epos.offset = sizeof(struct unallocSpaceEntry); + epos.bh = NULL; - while ((etype = udf_next_aext(table, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) - { + while ((etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) accum += (elen >> table->i_sb->s_blocksize_bits); - } - udf_release_data(bh); + brelse(epos.bh); unlock_kernel(); diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index ba068a78656..12613b680cc 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -95,7 +95,7 @@ static int udf_symlink_filler(struct file *file, struct page *page) } udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p); - udf_release_data(bh); + brelse(bh); unlock_kernel(); SetPageUptodate(page); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 0abd66ce36e..60d27764424 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -28,8 +28,8 @@ #include "udf_i.h" #include "udf_sb.h" -static void extent_trunc(struct inode * inode, kernel_lb_addr bloc, int extoffset, - kernel_lb_addr eloc, int8_t etype, uint32_t elen, struct buffer_head *bh, uint32_t nelen) +static void extent_trunc(struct inode * inode, struct extent_position *epos, + kernel_lb_addr eloc, int8_t etype, uint32_t elen, uint32_t nelen) { kernel_lb_addr neloc = { 0, 0 }; int last_block = (elen + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; @@ -49,7 +49,7 @@ static void extent_trunc(struct inode * inode, kernel_lb_addr bloc, int extoffse if (elen != nelen) { - udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 0); + udf_write_aext(inode, epos, neloc, nelen, 0); if (last_block - first_block > 0) { if (etype == (EXT_RECORDED_ALLOCATED >> 30)) @@ -61,74 +61,125 @@ static void extent_trunc(struct inode * inode, kernel_lb_addr bloc, int extoffse } } -void udf_discard_prealloc(struct inode * inode) +/* + * Truncate the last extent to match i_size. This function assumes + * that preallocation extent is already truncated. + */ +void udf_truncate_tail_extent(struct inode *inode) { - kernel_lb_addr bloc, eloc; - uint32_t extoffset = 0, elen, nelen; + struct extent_position epos = { NULL, 0, {0, 0}}; + kernel_lb_addr eloc; + uint32_t elen, nelen; uint64_t lbcount = 0; int8_t etype = -1, netype; - struct buffer_head *bh = NULL; int adsize; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || - inode->i_size == UDF_I_LENEXTENTS(inode)) - { + inode->i_size == UDF_I_LENEXTENTS(inode)) + return; + /* Are we going to delete the file anyway? */ + if (inode->i_nlink == 0) return; - } if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else - adsize = 0; + BUG(); - bloc = UDF_I_LOCATION(inode); - - while ((netype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1)) != -1) + /* Find the last extent in the file */ + while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { etype = netype; lbcount += elen; - if (lbcount > inode->i_size && lbcount - inode->i_size < inode->i_sb->s_blocksize) - { + if (lbcount > inode->i_size) { + if (lbcount - inode->i_size >= inode->i_sb->s_blocksize) + printk(KERN_WARNING + "udf_truncate_tail_extent(): Too long " + "extent after EOF in inode %u: i_size: " + "%Ld lbcount: %Ld extent %u+%u\n", + (unsigned)inode->i_ino, + (long long)inode->i_size, + (long long)lbcount, + (unsigned)eloc.logicalBlockNum, + (unsigned)elen); nelen = elen - (lbcount - inode->i_size); - extent_trunc(inode, bloc, extoffset-adsize, eloc, etype, elen, bh, nelen); - lbcount = inode->i_size; + epos.offset -= adsize; + extent_trunc(inode, &epos, eloc, etype, elen, nelen); + epos.offset += adsize; + if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) + printk(KERN_ERR "udf_truncate_tail_extent(): " + "Extent after EOF in inode %u.\n", + (unsigned)inode->i_ino); + break; } } - if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - extoffset -= adsize; + /* This inode entry is in-memory only and thus we don't have to mark + * the inode dirty */ + UDF_I_LENEXTENTS(inode) = inode->i_size; + brelse(epos.bh); +} + +void udf_discard_prealloc(struct inode *inode) +{ + struct extent_position epos = { NULL, 0, {0, 0}}; + kernel_lb_addr eloc; + uint32_t elen; + uint64_t lbcount = 0; + int8_t etype = -1, netype; + int adsize; + + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || + inode->i_size == UDF_I_LENEXTENTS(inode)) + return; + + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) + adsize = sizeof(short_ad); + else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) + adsize = sizeof(long_ad); + else + adsize = 0; + + epos.block = UDF_I_LOCATION(inode); + + /* Find the last extent in the file */ + while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { + etype = netype; + lbcount += elen; + } + if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + epos.offset -= adsize; lbcount -= elen; - extent_trunc(inode, bloc, extoffset, eloc, etype, elen, bh, 0); - if (!bh) - { - UDF_I_LENALLOC(inode) = extoffset - udf_file_entry_alloc_offset(inode); + extent_trunc(inode, &epos, eloc, etype, elen, 0); + if (!epos.bh) { + UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode); mark_inode_dirty(inode); - } - else - { - struct allocExtDesc *aed = (struct allocExtDesc *)(bh->b_data); - aed->lengthAllocDescs = cpu_to_le32(extoffset - sizeof(struct allocExtDesc)); + } else { + struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); + aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc)); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(bh->b_data, extoffset); + udf_update_tag(epos.bh->b_data, epos.offset); else - udf_update_tag(bh->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(bh, inode); + udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc)); + mark_buffer_dirty_inode(epos.bh, inode); } } + /* This inode entry is in-memory only and thus we don't have to mark + * the inode dirty */ UDF_I_LENEXTENTS(inode) = lbcount; - - udf_release_data(bh); + brelse(epos.bh); } void udf_truncate_extents(struct inode * inode) { - kernel_lb_addr bloc, eloc, neloc = { 0, 0 }; - uint32_t extoffset, elen, offset, nelen = 0, lelen = 0, lenalloc; + struct extent_position epos; + kernel_lb_addr eloc, neloc = { 0, 0 }; + uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; int8_t etype; - int first_block = inode->i_size >> inode->i_sb->s_blocksize_bits; - struct buffer_head *bh = NULL; + struct super_block *sb = inode->i_sb; + sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset; + loff_t byte_offset; int adsize; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) @@ -136,158 +187,130 @@ void udf_truncate_extents(struct inode * inode) else if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); else - adsize = 0; + BUG(); - etype = inode_bmap(inode, first_block, &bloc, &extoffset, &eloc, &elen, &offset, &bh); - offset += (inode->i_size & (inode->i_sb->s_blocksize - 1)); + etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); + byte_offset = (offset << sb->s_blocksize_bits) + (inode->i_size & (sb->s_blocksize-1)); if (etype != -1) { - extoffset -= adsize; - extent_trunc(inode, bloc, extoffset, eloc, etype, elen, bh, offset); - extoffset += adsize; - - if (offset) - lenalloc = extoffset; + epos.offset -= adsize; + extent_trunc(inode, &epos, eloc, etype, elen, byte_offset); + epos.offset += adsize; + if (byte_offset) + lenalloc = epos.offset; else - lenalloc = extoffset - adsize; + lenalloc = epos.offset - adsize; - if (!bh) + if (!epos.bh) lenalloc -= udf_file_entry_alloc_offset(inode); else lenalloc -= sizeof(struct allocExtDesc); - while ((etype = udf_current_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 0)) != -1) + while ((etype = udf_current_aext(inode, &epos, &eloc, &elen, 0)) != -1) { if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { - udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 0); - extoffset = 0; - if (lelen) + udf_write_aext(inode, &epos, neloc, nelen, 0); + if (indirect_ext_len) { - if (!bh) + /* We managed to free all extents in the + * indirect extent - free it too */ + if (!epos.bh) BUG(); - else - memset(bh->b_data, 0x00, sizeof(struct allocExtDesc)); - udf_free_blocks(inode->i_sb, inode, bloc, 0, lelen); + udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len); } else { - if (!bh) + if (!epos.bh) { UDF_I_LENALLOC(inode) = lenalloc; mark_inode_dirty(inode); } else { - struct allocExtDesc *aed = (struct allocExtDesc *)(bh->b_data); + struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); aed->lengthAllocDescs = cpu_to_le32(lenalloc); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(bh->b_data, lenalloc + + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(sb) >= 0x0201) + udf_update_tag(epos.bh->b_data, lenalloc + sizeof(struct allocExtDesc)); else - udf_update_tag(bh->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(bh, inode); + udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc)); + mark_buffer_dirty_inode(epos.bh, inode); } } - - udf_release_data(bh); - extoffset = sizeof(struct allocExtDesc); - bloc = eloc; - bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, bloc, 0)); + brelse(epos.bh); + epos.offset = sizeof(struct allocExtDesc); + epos.block = eloc; + epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, eloc, 0)); if (elen) - lelen = (elen + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + indirect_ext_len = (elen + + sb->s_blocksize - 1) >> + sb->s_blocksize_bits; else - lelen = 1; + indirect_ext_len = 1; } else { - extent_trunc(inode, bloc, extoffset, eloc, etype, elen, bh, 0); - extoffset += adsize; + extent_trunc(inode, &epos, eloc, etype, elen, 0); + epos.offset += adsize; } } - if (lelen) + if (indirect_ext_len) { - if (!bh) + if (!epos.bh) BUG(); - else - memset(bh->b_data, 0x00, sizeof(struct allocExtDesc)); - udf_free_blocks(inode->i_sb, inode, bloc, 0, lelen); + udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len); } else { - if (!bh) + if (!epos.bh) { UDF_I_LENALLOC(inode) = lenalloc; mark_inode_dirty(inode); } else { - struct allocExtDesc *aed = (struct allocExtDesc *)(bh->b_data); + struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); aed->lengthAllocDescs = cpu_to_le32(lenalloc); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(bh->b_data, lenalloc + + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(sb) >= 0x0201) + udf_update_tag(epos.bh->b_data, lenalloc + sizeof(struct allocExtDesc)); else - udf_update_tag(bh->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(bh, inode); + udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc)); + mark_buffer_dirty_inode(epos.bh, inode); } } } else if (inode->i_size) { - if (offset) + if (byte_offset) { + kernel_long_ad extent; + /* * OK, there is not extent covering inode->i_size and * no extent above inode->i_size => truncate is - * extending the file by 'offset'. + * extending the file by 'offset' blocks. */ - if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) || - (bh && extoffset == sizeof(struct allocExtDesc))) { - /* File has no extents at all! */ - memset(&eloc, 0x00, sizeof(kernel_lb_addr)); - elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; - udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); + if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) || + (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { + /* File has no extents at all or has empty last + * indirect extent! Create a fake extent... */ + extent.extLocation.logicalBlockNum = 0; + extent.extLocation.partitionReferenceNum = 0; + extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; } else { - extoffset -= adsize; - etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); - if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) - { - extoffset -= adsize; - elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); - udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); - } - else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - kernel_lb_addr neloc = { 0, 0 }; - extoffset -= adsize; - nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | - ((elen + offset + inode->i_sb->s_blocksize - 1) & - ~(inode->i_sb->s_blocksize - 1)); - udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); - udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); - } - else - { - if (elen & (inode->i_sb->s_blocksize - 1)) - { - extoffset -= adsize; - elen = EXT_RECORDED_ALLOCATED | - ((elen + inode->i_sb->s_blocksize - 1) & - ~(inode->i_sb->s_blocksize - 1)); - udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); - } - memset(&eloc, 0x00, sizeof(kernel_lb_addr)); - elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; - udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); - } + epos.offset -= adsize; + etype = udf_next_aext(inode, &epos, + &extent.extLocation, &extent.extLength, 0); + extent.extLength |= etype << 30; } + udf_extend_file(inode, &epos, &extent, offset+((inode->i_size & (sb->s_blocksize-1)) != 0)); } } UDF_I_LENEXTENTS(inode) = inode->i_size; - udf_release_data(bh); + brelse(epos.bh); } diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 110f8d62616..3b2e6c8cb15 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -93,7 +93,7 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb) for (i=0; i<nr_groups; i++)\ {\ if (UDF_SB_BITMAP(X,Y,Z,i))\ - udf_release_data(UDF_SB_BITMAP(X,Y,Z,i));\ + brelse(UDF_SB_BITMAP(X,Y,Z,i));\ }\ if (size <= PAGE_SIZE)\ kfree(UDF_SB_PARTMAPS(X)[Y].Z.s_bitmap);\ diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index ee1dece1f6f..f581f2f69c0 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -77,6 +77,13 @@ struct ustr uint8_t u_len; }; +struct extent_position { + struct buffer_head *bh; + uint32_t offset; + kernel_lb_addr block; +}; + + /* super.c */ extern void udf_error(struct super_block *, const char *, const char *, ...); extern void udf_warning(struct super_block *, const char *, const char *, ...); @@ -98,13 +105,14 @@ extern void udf_read_inode(struct inode *); extern void udf_delete_inode(struct inode *); extern void udf_clear_inode(struct inode *); extern int udf_write_inode(struct inode *, int); -extern long udf_block_map(struct inode *, long); -extern int8_t inode_bmap(struct inode *, int, kernel_lb_addr *, uint32_t *, kernel_lb_addr *, uint32_t *, uint32_t *, struct buffer_head **); -extern int8_t udf_add_aext(struct inode *, kernel_lb_addr *, int *, kernel_lb_addr, uint32_t, struct buffer_head **, int); -extern int8_t udf_write_aext(struct inode *, kernel_lb_addr, int *, kernel_lb_addr, uint32_t, struct buffer_head *, int); -extern int8_t udf_delete_aext(struct inode *, kernel_lb_addr, int, kernel_lb_addr, uint32_t, struct buffer_head *); -extern int8_t udf_next_aext(struct inode *, kernel_lb_addr *, int *, kernel_lb_addr *, uint32_t *, struct buffer_head **, int); -extern int8_t udf_current_aext(struct inode *, kernel_lb_addr *, int *, kernel_lb_addr *, uint32_t *, struct buffer_head **, int); +extern long udf_block_map(struct inode *, sector_t); +extern int udf_extend_file(struct inode *, struct extent_position *, kernel_long_ad *, sector_t); +extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, kernel_lb_addr *, uint32_t *, sector_t *); +extern int8_t udf_add_aext(struct inode *, struct extent_position *, kernel_lb_addr, uint32_t, int); +extern int8_t udf_write_aext(struct inode *, struct extent_position *, kernel_lb_addr, uint32_t, int); +extern int8_t udf_delete_aext(struct inode *, struct extent_position, kernel_lb_addr, uint32_t); +extern int8_t udf_next_aext(struct inode *, struct extent_position *, kernel_lb_addr *, uint32_t *, int); +extern int8_t udf_current_aext(struct inode *, struct extent_position *, kernel_lb_addr *, uint32_t *, int); /* misc.c */ extern struct buffer_head *udf_tgetblk(struct super_block *, int); @@ -113,7 +121,6 @@ extern struct genericFormat *udf_add_extendedattr(struct inode *, uint32_t, uint extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t, uint8_t); extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, uint32_t, uint16_t *); extern struct buffer_head *udf_read_ptagged(struct super_block *, kernel_lb_addr, uint32_t, uint16_t *); -extern void udf_release_data(struct buffer_head *); extern void udf_update_tag(char *, int); extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int); @@ -139,6 +146,7 @@ extern void udf_free_inode(struct inode *); extern struct inode * udf_new_inode (struct inode *, int, int *); /* truncate.c */ +extern void udf_truncate_tail_extent(struct inode *); extern void udf_discard_prealloc(struct inode *); extern void udf_truncate_extents(struct inode *); @@ -151,7 +159,7 @@ extern int udf_new_block(struct super_block *, struct inode *, uint16_t, uint32_ extern int udf_fsync_file(struct file *, struct dentry *, int); /* directory.c */ -extern struct fileIdentDesc * udf_fileident_read(struct inode *, loff_t *, struct udf_fileident_bh *, struct fileIdentDesc *, kernel_lb_addr *, uint32_t *, kernel_lb_addr *, uint32_t *, uint32_t *, struct buffer_head **); +extern struct fileIdentDesc * udf_fileident_read(struct inode *, loff_t *, struct udf_fileident_bh *, struct fileIdentDesc *, struct extent_position *, kernel_lb_addr *, uint32_t *, sector_t *); extern struct fileIdentDesc * udf_get_fileident(void * buffer, int bufsize, int * offset); extern long_ad * udf_get_filelongad(uint8_t *, int, int *, int); extern short_ad * udf_get_fileshortad(uint8_t *, int, int *, int); diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 4fb8b2e077e..154452172f4 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -19,7 +19,6 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/ufs_fs.h> -#include <linux/smp_lock.h> #include "swab.h" #include "util.h" diff --git a/fs/ufs/super.c b/fs/ufs/super.c index be7c48c5f20..22ff6ed55ce 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1237,8 +1237,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/utimes.c b/fs/utimes.c index 99cf2cb11fe..480f7c8c29d 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -1,8 +1,10 @@ #include <linux/compiler.h> +#include <linux/file.h> #include <linux/fs.h> #include <linux/linkage.h> #include <linux/namei.h> #include <linux/sched.h> +#include <linux/stat.h> #include <linux/utime.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -20,54 +22,18 @@ * must be owner or have write permission. * Else, update from *times, must be owner or super user. */ -asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) +asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) { - int error; - struct nameidata nd; - struct inode * inode; - struct iattr newattrs; + struct timespec tv[2]; - error = user_path_walk(filename, &nd); - if (error) - goto out; - inode = nd.dentry->d_inode; - - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; - - /* Don't worry, the checks are done in inode_change_ok() */ - newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { - error = -EPERM; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - goto dput_and_out; - - error = get_user(newattrs.ia_atime.tv_sec, ×->actime); - newattrs.ia_atime.tv_nsec = 0; - if (!error) - error = get_user(newattrs.ia_mtime.tv_sec, ×->modtime); - newattrs.ia_mtime.tv_nsec = 0; - if (error) - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; - } else { - error = -EACCES; - if (IS_IMMUTABLE(inode)) - goto dput_and_out; - - if (current->fsuid != inode->i_uid && - (error = vfs_permission(&nd, MAY_WRITE)) != 0) - goto dput_and_out; + if (get_user(tv[0].tv_sec, ×->actime) || + get_user(tv[1].tv_sec, ×->modtime)) + return -EFAULT; + tv[0].tv_nsec = 0; + tv[1].tv_nsec = 0; } - mutex_lock(&inode->i_mutex); - error = notify_change(nd.dentry, &newattrs); - mutex_unlock(&inode->i_mutex); -dput_and_out: - path_release(&nd); -out: - return error; + return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0); } #endif @@ -76,18 +42,38 @@ out: * must be owner or have write permission. * Else, update from *times, must be owner or super user. */ -long do_utimes(int dfd, char __user *filename, struct timeval *times) +long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags) { int error; struct nameidata nd; - struct inode * inode; + struct dentry *dentry; + struct inode *inode; struct iattr newattrs; + struct file *f = NULL; - error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); - - if (error) + error = -EINVAL; + if (flags & ~AT_SYMLINK_NOFOLLOW) goto out; - inode = nd.dentry->d_inode; + + if (filename == NULL && dfd != AT_FDCWD) { + error = -EINVAL; + if (flags & AT_SYMLINK_NOFOLLOW) + goto out; + + error = -EBADF; + f = fget(dfd); + if (!f) + goto out; + dentry = f->f_path.dentry; + } else { + error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); + if (error) + goto out; + + dentry = nd.dentry; + } + + inode = dentry->d_inode; error = -EROFS; if (IS_RDONLY(inode)) @@ -100,11 +86,21 @@ long do_utimes(int dfd, char __user *filename, struct timeval *times) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) goto dput_and_out; - newattrs.ia_atime.tv_sec = times[0].tv_sec; - newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000; - newattrs.ia_mtime.tv_sec = times[1].tv_sec; - newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; + if (times[0].tv_nsec == UTIME_OMIT) + newattrs.ia_valid &= ~ATTR_ATIME; + else if (times[0].tv_nsec != UTIME_NOW) { + newattrs.ia_atime.tv_sec = times[0].tv_sec; + newattrs.ia_atime.tv_nsec = times[0].tv_nsec; + newattrs.ia_valid |= ATTR_ATIME_SET; + } + + if (times[1].tv_nsec == UTIME_OMIT) + newattrs.ia_valid &= ~ATTR_MTIME; + else if (times[1].tv_nsec != UTIME_NOW) { + newattrs.ia_mtime.tv_sec = times[1].tv_sec; + newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; + newattrs.ia_valid |= ATTR_MTIME_SET; + } } else { error = -EACCES; if (IS_IMMUTABLE(inode)) @@ -115,21 +111,67 @@ long do_utimes(int dfd, char __user *filename, struct timeval *times) goto dput_and_out; } mutex_lock(&inode->i_mutex); - error = notify_change(nd.dentry, &newattrs); + error = notify_change(dentry, &newattrs); mutex_unlock(&inode->i_mutex); dput_and_out: - path_release(&nd); + if (f) + fput(f); + else + path_release(&nd); out: return error; } +asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __user *utimes, int flags) +{ + struct timespec tstimes[2]; + + if (utimes) { + if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) + return -EFAULT; + if ((tstimes[0].tv_nsec == UTIME_OMIT || + tstimes[0].tv_nsec == UTIME_NOW) && + tstimes[0].tv_sec != 0) + return -EINVAL; + if ((tstimes[1].tv_nsec == UTIME_OMIT || + tstimes[1].tv_nsec == UTIME_NOW) && + tstimes[1].tv_sec != 0) + return -EINVAL; + + /* Nothing to do, we must not even check the path. */ + if (tstimes[0].tv_nsec == UTIME_OMIT && + tstimes[1].tv_nsec == UTIME_OMIT) + return 0; + } + + return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags); +} + asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) { struct timeval times[2]; + struct timespec tstimes[2]; + + if (utimes) { + if (copy_from_user(×, utimes, sizeof(times))) + return -EFAULT; + + /* This test is needed to catch all invalid values. If we + would test only in do_utimes we would miss those invalid + values truncated by the multiplication with 1000. Note + that we also catch UTIME_{NOW,OMIT} here which are only + valid for utimensat. */ + if (times[0].tv_usec >= 1000000 || times[0].tv_usec < 0 || + times[1].tv_usec >= 1000000 || times[1].tv_usec < 0) + return -EINVAL; + + tstimes[0].tv_sec = times[0].tv_sec; + tstimes[0].tv_nsec = 1000 * times[0].tv_usec; + tstimes[1].tv_sec = times[1].tv_sec; + tstimes[1].tv_nsec = 1000 * times[1].tv_usec; + } - if (utimes && copy_from_user(×, utimes, sizeof(times))) - return -EFAULT; - return do_utimes(dfd, filename, utimes ? times : NULL); + return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0); } asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) diff --git a/fs/xattr.c b/fs/xattr.c index 38646132ab0..4523aca7965 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -9,7 +9,6 @@ */ #include <linux/fs.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/file.h> #include <linux/xattr.h> #include <linux/namei.h> @@ -351,6 +350,7 @@ sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) f = fget(fd); if (!f) return error; + audit_inode(NULL, f->f_path.dentry->d_inode); error = getxattr(f->f_path.dentry, name, value, size); fput(f); return error; @@ -423,6 +423,7 @@ sys_flistxattr(int fd, char __user *list, size_t size) f = fget(fd); if (!f) return error; + audit_inode(NULL, f->f_path.dentry->d_inode); error = listxattr(f->f_path.dentry, list, size); fput(f); return error; diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h index af168a1a98c..c110bb00266 100644 --- a/fs/xfs/linux-2.6/mrlock.h +++ b/fs/xfs/linux-2.6/mrlock.h @@ -43,6 +43,18 @@ static inline void mrupdate(mrlock_t *mrp) mrp->mr_writer = 1; } +static inline void mraccess_nested(mrlock_t *mrp, int subclass) +{ + down_read_nested(&mrp->mr_lock, subclass); +} + +static inline void mrupdate_nested(mrlock_t *mrp, int subclass) +{ + down_write_nested(&mrp->mr_lock, subclass); + mrp->mr_writer = 1; +} + + static inline int mrtryaccess(mrlock_t *mrp) { return down_read_trylock(&mrp->mr_lock); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 143ffc851c9..7361861e3aa 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -141,9 +141,46 @@ xfs_destroy_ioend( } /* + * Update on-disk file size now that data has been written to disk. + * The current in-memory file size is i_size. If a write is beyond + * eof io_new_size will be the intended file size until i_size is + * updated. If this write does not extend all the way to the valid + * file size then restrict this update to the end of the write. + */ +STATIC void +xfs_setfilesize( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip; + xfs_fsize_t isize; + xfs_fsize_t bsize; + + ip = xfs_vtoi(ioend->io_vnode); + + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); + ASSERT(ioend->io_type != IOMAP_READ); + + if (unlikely(ioend->io_error)) + return; + + bsize = ioend->io_offset + ioend->io_size; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + isize = MAX(ip->i_size, ip->i_iocore.io_new_size); + isize = MIN(isize, bsize); + + if (ip->i_d.di_size < isize) { + ip->i_d.di_size = isize; + ip->i_update_core = 1; + ip->i_update_size = 1; + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); +} + +/* * Buffered IO write completion for delayed allocate extents. - * TODO: Update ondisk isize now that we know the file data - * has been flushed (i.e. the notorious "NULL file" problem). */ STATIC void xfs_end_bio_delalloc( @@ -152,6 +189,7 @@ xfs_end_bio_delalloc( xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + xfs_setfilesize(ioend); xfs_destroy_ioend(ioend); } @@ -165,6 +203,7 @@ xfs_end_bio_written( xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + xfs_setfilesize(ioend); xfs_destroy_ioend(ioend); } @@ -184,8 +223,23 @@ xfs_end_bio_unwritten( xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; - if (likely(!ioend->io_error)) + if (likely(!ioend->io_error)) { bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL); + xfs_setfilesize(ioend); + } + xfs_destroy_ioend(ioend); +} + +/* + * IO read completion for regular, written extents. + */ +STATIC void +xfs_end_bio_read( + struct work_struct *work) +{ + xfs_ioend_t *ioend = + container_of(work, xfs_ioend_t, io_work); + xfs_destroy_ioend(ioend); } @@ -224,6 +278,8 @@ xfs_alloc_ioend( INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten); else if (type == IOMAP_DELAY) INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc); + else if (type == IOMAP_READ) + INIT_WORK(&ioend->io_work, xfs_end_bio_read); else INIT_WORK(&ioend->io_work, xfs_end_bio_written); @@ -645,7 +701,7 @@ xfs_is_delayed_page( else if (buffer_delay(bh)) acceptable = (type == IOMAP_DELAY); else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable = (type == 0); + acceptable = (type == IOMAP_NEW); else break; } while ((bh = bh->b_this_page) != head); @@ -754,7 +810,7 @@ xfs_convert_page( page_dirty--; count++; } else { - type = 0; + type = IOMAP_NEW; if (buffer_mapped(bh) && all_bh && startio) { lock_buffer(bh); xfs_add_to_ioend(inode, bh, offset, @@ -912,8 +968,8 @@ xfs_page_state_convert( bh = head = page_buffers(page); offset = page_offset(page); - flags = -1; - type = 0; + flags = BMAPI_READ; + type = IOMAP_NEW; /* TODO: cleanup count and page_dirty */ @@ -943,14 +999,14 @@ xfs_page_state_convert( * * Third case, an unmapped buffer was found, and we are * in a path where we need to write the whole page out. - */ + */ if (buffer_unwritten(bh) || buffer_delay(bh) || ((buffer_uptodate(bh) || PageUptodate(page)) && !buffer_mapped(bh) && (unmapped || startio))) { - /* + /* * Make sure we don't use a read-only iomap */ - if (flags == BMAPI_READ) + if (flags == BMAPI_READ) iomap_valid = 0; if (buffer_unwritten(bh)) { @@ -999,7 +1055,7 @@ xfs_page_state_convert( * That means it must already have extents allocated * underneath it. Map the extent by reading it. */ - if (!iomap_valid || type != 0) { + if (!iomap_valid || flags != BMAPI_READ) { flags = BMAPI_READ; size = xfs_probe_cluster(inode, page, bh, head, 1); @@ -1010,7 +1066,15 @@ xfs_page_state_convert( iomap_valid = xfs_iomap_valid(&iomap, offset); } - type = 0; + /* + * We set the type to IOMAP_NEW in case we are doing a + * small write at EOF that is extending the file but + * without needing an allocation. We need to update the + * file size on I/O completion in this case so it is + * the same case as having just allocated a new extent + * that we are writing into for the first time. + */ + type = IOMAP_NEW; if (!test_and_set_bit(BH_Lock, &bh->b_state)) { ASSERT(buffer_mapped(bh)); if (iomap_valid) @@ -1356,12 +1420,21 @@ xfs_end_io_direct( * completion handler in the future, in which case all this can * go away. */ - if (private && size > 0) { - ioend->io_offset = offset; - ioend->io_size = size; + ioend->io_offset = offset; + ioend->io_size = size; + if (ioend->io_type == IOMAP_READ) { + xfs_finish_ioend(ioend); + } else if (private && size > 0) { xfs_finish_ioend(ioend); } else { - xfs_destroy_ioend(ioend); + /* + * A direct I/O write ioend starts it's life in unwritten + * state in case they map an unwritten extent. This write + * didn't map an unwritten extent so switch it's completion + * handler. + */ + INIT_WORK(&ioend->io_work, xfs_end_bio_written); + xfs_finish_ioend(ioend); } /* @@ -1392,15 +1465,15 @@ xfs_vm_direct_IO( if (error) return -error; - iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); - if (rw == WRITE) { + iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); ret = blockdev_direct_IO_own_locking(rw, iocb, inode, iomap.iomap_target->bt_bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct); } else { + iocb->private = xfs_alloc_ioend(inode, IOMAP_READ); ret = blockdev_direct_IO_no_locking(rw, iocb, inode, iomap.iomap_target->bt_bdev, iov, offset, nr_segs, diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 69e9e80735d..fe4f66a5af1 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1426,7 +1426,7 @@ xfs_free_bufhash( /* * buftarg list for delwrite queue processing */ -LIST_HEAD(xfs_buftarg_list); +static LIST_HEAD(xfs_buftarg_list); static DEFINE_SPINLOCK(xfs_buftarg_lock); STATIC void @@ -1867,3 +1867,11 @@ xfs_buf_terminate(void) ktrace_free(xfs_buf_trace_buf); #endif } + +#ifdef CONFIG_KDB_MODULES +struct list_head * +xfs_get_buftarg_list(void) +{ + return &xfs_buftarg_list; +} +#endif diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 9e8ef8fef39..b6241f6201a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -411,6 +411,9 @@ extern void xfs_free_buftarg(xfs_buftarg_t *, int); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); +#ifdef CONFIG_KDB_MODULES +extern struct list_head *xfs_get_buftarg_list(void); +#endif #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index dc0562828e7..2eb87cd082a 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -35,7 +35,7 @@ fs_tosspages( truncate_inode_pages(ip->i_mapping, first); } -void +int fs_flushinval_pages( bhv_desc_t *bdp, xfs_off_t first, @@ -44,13 +44,16 @@ fs_flushinval_pages( { bhv_vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = vn_to_inode(vp); + int ret = 0; if (VN_CACHED(vp)) { if (VN_TRUNC(vp)) VUNTRUNCATE(vp); - filemap_write_and_wait(ip->i_mapping); - truncate_inode_pages(ip->i_mapping, first); + ret = filemap_write_and_wait(ip->i_mapping); + if (!ret) + truncate_inode_pages(ip->i_mapping, first); } + return ret; } int @@ -63,14 +66,18 @@ fs_flush_pages( { bhv_vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = vn_to_inode(vp); + int ret = 0; + int ret2; if (VN_DIRTY(vp)) { if (VN_TRUNC(vp)) VUNTRUNCATE(vp); - filemap_fdatawrite(ip->i_mapping); + ret = filemap_fdatawrite(ip->i_mapping); if (flags & XFS_B_ASYNC) - return 0; - filemap_fdatawait(ip->i_mapping); + return ret; + ret2 = filemap_fdatawait(ip->i_mapping); + if (!ret) + ret = ret2; } - return 0; + return ret; } diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h index aee9ccdd18f..c1b53118a30 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.h +++ b/fs/xfs/linux-2.6/xfs_fs_subr.h @@ -23,7 +23,7 @@ extern int fs_noerr(void); extern int fs_nosys(void); extern void fs_noval(void); extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); -extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +extern int fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int); #endif /* __XFS_FS_SUBR_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index ff8d64eba9f..ed90403f0ee 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -159,7 +159,7 @@ xfs_iozero( if (status) goto unlock; - memclear_highpage_flush(page, offset, bytes); + zero_user_page(page, offset, bytes, KM_USER0); status = mapping->a_ops->commit_write(NULL, page, offset, offset + bytes); @@ -191,7 +191,7 @@ xfs_read( struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; size_t size = 0; - ssize_t ret; + ssize_t ret = 0; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; @@ -224,7 +224,7 @@ xfs_read( mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->bt_smask) || (size & target->bt_smask)) { - if (*offset == ip->i_d.di_size) { + if (*offset == ip->i_size) { return (0); } return -XFS_ERROR(EINVAL); @@ -263,9 +263,13 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) - bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), + ret = bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); + if (ret) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return ret; + } } xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, @@ -383,9 +387,10 @@ xfs_splice_write( { xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; + xfs_iocore_t *io = &ip->i_iocore; ssize_t ret; struct inode *inode = outfilp->f_mapping->host; - xfs_fsize_t isize; + xfs_fsize_t isize, new_size; XFS_STATS_INC(xs_write_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -406,6 +411,14 @@ xfs_splice_write( return -error; } } + + new_size = *ppos + count; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (new_size > ip->i_size) + io->io_new_size = new_size; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, pipe, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); @@ -416,14 +429,18 @@ xfs_splice_write( if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) *ppos = isize; - if (*ppos > ip->i_d.di_size) { + if (*ppos > ip->i_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); - if (*ppos > ip->i_d.di_size) { - ip->i_d.di_size = *ppos; - i_size_write(inode, *ppos); - ip->i_update_core = 1; - ip->i_update_size = 1; - } + if (*ppos > ip->i_size) + ip->i_size = *ppos; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + + if (io->io_new_size) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + io->io_new_size = 0; + if (ip->i_d.di_size > ip->i_size) + ip->i_d.di_size = ip->i_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); @@ -639,37 +656,21 @@ xfs_write( xfs_fsize_t isize, new_size; xfs_iocore_t *io; bhv_vnode_t *vp; - unsigned long seg; int iolock; int eventsent = 0; bhv_vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; - int need_i_mutex = 1, need_flush = 0; + int need_i_mutex; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); - for (seg = 0; seg < segs; seg++) { - const struct iovec *iv = &iovp[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - ocount += iv->iov_len; - if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - segs = seg; - ocount -= iv->iov_len; /* This segment is no good */ - break; - } + error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); + if (error) + return error; count = ocount; pos = *offset; @@ -685,39 +686,20 @@ xfs_write( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (ioflags & IO_ISDIRECT) { - xfs_buftarg_t *target = - (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - - if ((pos & target->bt_smask) || (count & target->bt_smask)) - return XFS_ERROR(-EINVAL); - - if (!VN_CACHED(vp) && pos < i_size_read(inode)) - need_i_mutex = 0; - - if (VN_CACHED(vp)) - need_flush = 1; - } - relock: - if (need_i_mutex) { + if (ioflags & IO_ISDIRECT) { + iolock = XFS_IOLOCK_SHARED; + locktype = VRWLOCK_WRITE_DIRECT; + need_i_mutex = 0; + } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; - + need_i_mutex = 1; mutex_lock(&inode->i_mutex); - } else { - iolock = XFS_IOLOCK_SHARED; - locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); - isize = i_size_read(inode); - - if (file->f_flags & O_APPEND) - *offset = isize; - start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -726,13 +708,8 @@ start: goto out_unlock_mutex; } - new_size = pos + count; - if (new_size > isize) - io->io_new_size = new_size; - if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { - loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) @@ -743,8 +720,7 @@ start: pos, count, dmflags, &locktype); if (error) { - xfs_iunlock(xip, iolock); - goto out_unlock_mutex; + goto out_unlock_internal; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; @@ -756,12 +732,35 @@ start: * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ - if ((file->f_flags & O_APPEND) && savedsize != isize) { - pos = isize = xip->i_d.di_size; + if ((file->f_flags & O_APPEND) && pos != xip->i_size) + goto start; + } + + if (ioflags & IO_ISDIRECT) { + xfs_buftarg_t *target = + (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + if ((pos & target->bt_smask) || (count & target->bt_smask)) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + return XFS_ERROR(-EINVAL); + } + + if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + iolock = XFS_IOLOCK_EXCL; + locktype = VRWLOCK_WRITE; + need_i_mutex = 1; + mutex_lock(&inode->i_mutex); + xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); goto start; } } + new_size = pos + count; + if (new_size > xip->i_size) + io->io_new_size = new_size; + if (likely(!(ioflags & IO_INVIS))) { file_update_time(file); xfs_ichgtime_fast(xip, inode, @@ -777,11 +776,11 @@ start: * to zero it out up to the new size. */ - if (pos > isize) { - error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize); + if (pos > xip->i_size) { + error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, xip->i_size); if (error) { - xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); - goto out_unlock_mutex; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + goto out_unlock_internal; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); @@ -801,8 +800,7 @@ start: if (likely(!error)) error = -remove_suid(file->f_path.dentry); if (unlikely(error)) { - xfs_iunlock(xip, iolock); - goto out_unlock_mutex; + goto out_unlock_internal; } } @@ -811,11 +809,14 @@ retry: current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { - if (need_flush) { + if (VN_CACHED(vp)) { + WARN_ON(need_i_mutex == 0); xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); - bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), + error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); + if (error) + goto out_unlock_internal; } if (need_i_mutex) { @@ -843,7 +844,6 @@ retry: pos += ret; count -= ret; - need_i_mutex = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; @@ -870,12 +870,12 @@ retry: error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ - if (error) - goto out_nounlocks; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_rwlock(bdp, locktype); - pos = xip->i_d.di_size; + if (error) + goto out_unlock_internal; + pos = xip->i_size; ret = 0; goto retry; } @@ -884,14 +884,10 @@ retry: if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; - if (*offset > xip->i_d.di_size) { + if (*offset > xip->i_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); - if (*offset > xip->i_d.di_size) { - xip->i_d.di_size = *offset; - i_size_write(inode, *offset); - xip->i_update_core = 1; - xip->i_update_size = 1; - } + if (*offset > xip->i_size) + xip->i_size = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); } @@ -913,16 +909,31 @@ retry: error = sync_page_range(inode, mapping, pos, ret); if (!error) - error = ret; - return error; + error = -ret; + if (need_i_mutex) + mutex_lock(&inode->i_mutex); + xfs_rwlock(bdp, locktype); } out_unlock_internal: + if (io->io_new_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + io->io_new_size = 0; + /* + * If this was a direct or synchronous I/O that failed (such + * as ENOSPC) then part of the I/O may have been written to + * disk before the error occured. In this case the on-disk + * file size may have been adjusted beyond the in-memory file + * size and now needs to be truncated back. + */ + if (xip->i_d.di_size > xip->i_size) + xip->i_d.di_size = xip->i_size; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } xfs_rwunlock(bdp, locktype); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); - out_nounlocks: return -error; } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 14e2cbe5a8d..bf9a9d5909b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -360,8 +360,7 @@ xfs_fs_inode_init_once( kmem_zone_t *zonep, unsigned long flags) { - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); + inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index b76118cf489..d1b2d01843d 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -194,7 +194,7 @@ typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int, typedef void (*vop_link_removed_t)(bhv_desc_t *, bhv_vnode_t *, int); typedef void (*vop_vnode_change_t)(bhv_desc_t *, bhv_vchange_t, __psint_t); typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); -typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +typedef int (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int); typedef int (*vop_iflush_t)(bhv_desc_t *, int); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 4adaf13aac6..cfdd35ee9f7 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -753,8 +753,7 @@ xfs_qm_idtodq( goto error0; } if (tp) { - if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, - NULL))) + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) goto error1; } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 1de2acdc7f7..3e4a8ad8a34 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -388,6 +388,17 @@ xfs_qm_mount_quotas( return XFS_ERROR(error); } } + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + if (!XFS_IS_UQUOTA_ON(mp)) { + mp->m_qflags &= ~XFS_UQUOTA_CHKD; + } + if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) { + mp->m_qflags &= ~XFS_OQUOTA_CHKD; + } write_changes: /* @@ -1453,8 +1464,7 @@ xfs_qm_qino_alloc( XFS_SB_UNLOCK(mp, s); xfs_mod_sb(tp, sbfields); - if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, - NULL))) { + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); return error; } @@ -2405,7 +2415,7 @@ xfs_qm_write_sb_changes( } xfs_mod_sb(tp, flags); - (void) xfs_trans_commit(tp, 0, NULL); + (void) xfs_trans_commit(tp, 0); return 0; } diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 716f562aa8b..2df67fd913e 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -456,9 +456,7 @@ xfs_qm_scall_quotaon( || ((flags & XFS_PQUOTA_ACCT) == 0 && (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && - (flags & XFS_OQUOTA_ENFD)) - || - ((flags & XFS_GQUOTA_ACCT) == 0 && + (flags & XFS_GQUOTA_ACCT) == 0 && (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && (flags & XFS_OQUOTA_ENFD))) { qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", @@ -735,7 +733,7 @@ xfs_qm_scall_setqlim( xfs_trans_log_dquot(tp, dqp); xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); - xfs_trans_commit(tp, 0, NULL); + xfs_trans_commit(tp, 0); xfs_qm_dqprint(dqp); xfs_qm_dqrele(dqp); mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); @@ -809,7 +807,7 @@ xfs_qm_log_quotaoff_end( * We don't care about quotoff's performance. */ xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); return (error); } @@ -852,7 +850,7 @@ xfs_qm_log_quotaoff( * We don't care about quotoff's performance. */ xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); error0: if (error) { @@ -911,14 +909,19 @@ xfs_qm_export_dquot( * gets turned off. No need to confuse the user level code, * so return zeroes in that case. */ - if (! XFS_IS_QUOTA_ENFORCED(mp)) { + if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || + (!XFS_IS_OQUOTA_ENFORCED(mp) && + (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { dst->d_btimer = 0; dst->d_itimer = 0; dst->d_rtbtimer = 0; } #ifdef DEBUG - if (XFS_IS_QUOTA_ENFORCED(mp) && dst->d_id != 0) { + if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) || + (XFS_IS_OQUOTA_ENFORCED(mp) && + (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) && + dst->d_id != 0) { if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && (dst->d_blk_softlimit > 0)) { ASSERT(dst->d_btimer != 0); diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index d7491e7b1f3..7de6874bf1b 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -656,7 +656,9 @@ xfs_trans_dqresv( if ((flags & XFS_QMOPT_FORCE_RES) == 0 && dqp->q_core.d_id && - XFS_IS_QUOTA_ENFORCED(dqp->q_mount)) { + ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || + (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && + (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { #ifdef QUOTADEBUG cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 08bbd3cb87a..f45a49ffd3a 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -81,20 +81,3 @@ assfail(char *expr, char *file, int line) printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); BUG(); } - -#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM)) -unsigned long random(void) -{ - static unsigned long RandomValue = 1; - /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */ - register long rv = RandomValue; - register long lo; - register long hi; - - hi = rv / 127773; - lo = rv % 127773; - rv = 16807 * lo - 2836 * hi; - if (rv <= 0) rv += 2147483647; - return RandomValue = rv; -} -#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */ diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index 2a70cc605ae..a27a7c8c052 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -50,7 +50,7 @@ extern void assfail(char *expr, char *f, int l); #else /* DEBUG */ # define ASSERT(expr) ASSERT_ALWAYS(expr) -extern unsigned long random(void); +# include <linux/random.h> #ifndef STATIC # define STATIC noinline diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index e80dda3437d..8e9a40aa0cd 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -764,7 +764,7 @@ xfs_alloc_ag_vextent_near( */ int dofirst; /* set to do first algorithm */ - dofirst = random() & 1; + dofirst = random32() & 1; #endif /* * Get a cursor for the by-size btree. diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 9d358ffce4e..7ce44a7b88a 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -328,8 +328,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, xfs_trans_set_sync(args.trans); } err2 = xfs_trans_commit(args.trans, - XFS_TRANS_RELEASE_LOG_RES, - NULL); + XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); /* @@ -397,8 +396,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES, - NULL); + error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); /* @@ -544,8 +542,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags) * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES, - NULL); + error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); /* @@ -859,8 +856,7 @@ xfs_attr_inactive(xfs_inode_t *dp) * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES, - NULL); + error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); return(error); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 8eab73e8340..81f45dae1c5 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -3053,7 +3053,7 @@ xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp) * is in progress. The caller takes the responsibility to cancel * the duplicate transaction that gets returned. */ - if ((error = xfs_trans_commit(trans, 0, NULL))) + if ((error = xfs_trans_commit(trans, 0))) return (error); trans = *transp; diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 87795188ced..b1ea26e40aa 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -130,7 +130,6 @@ STATIC int /* error */ xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp,/* inode logging flags */ xfs_extdelta_t *delta, /* Change made to incore extents */ @@ -399,7 +398,6 @@ xfs_bmap_count_leaves( STATIC int xfs_bmap_disk_count_leaves( - xfs_ifork_t *ifp, xfs_extnum_t idx, xfs_bmbt_block_t *block, int numrecs, @@ -580,7 +578,7 @@ xfs_bmap_add_extent( if (cur) ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new, + if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, &logflags, delta, rsvd))) goto done; } @@ -1841,7 +1839,6 @@ STATIC int /* error */ xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ xfs_extdelta_t *delta, /* Change made to incore extents */ @@ -4071,7 +4068,7 @@ xfs_bmap_add_attrfork( } if ((error = xfs_bmap_finish(&tp, &flist, &committed))) goto error2; - error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); return error; @@ -4227,7 +4224,7 @@ xfs_bmap_finish( logres = ntp->t_log_res; logcount = ntp->t_log_count; ntp = xfs_trans_dup(*tp); - error = xfs_trans_commit(*tp, 0, NULL); + error = xfs_trans_commit(*tp, 0); *tp = ntp; *committed = 1; /* @@ -4447,8 +4444,11 @@ xfs_bmap_one_block( xfs_bmbt_irec_t s; /* internal version of extent */ #ifndef DEBUG - if (whichfork == XFS_DATA_FORK) - return ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize; + if (whichfork == XFS_DATA_FORK) { + return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ? + (ip->i_size == ip->i_mount->m_sb.sb_blocksize) : + (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); + } #endif /* !DEBUG */ if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) return 0; @@ -4460,7 +4460,7 @@ xfs_bmap_one_block( xfs_bmbt_get_all(ep, &s); rval = s.br_startoff == 0 && s.br_blockcount == 1; if (rval && whichfork == XFS_DATA_FORK) - ASSERT(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); + ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize); return rval; } @@ -5820,7 +5820,7 @@ xfs_getbmap( fixlen = XFS_MAXIOFFSET(mp); } else { prealloced = 0; - fixlen = ip->i_d.di_size; + fixlen = ip->i_size; } } else { prealloced = 0; @@ -5844,7 +5844,8 @@ xfs_getbmap( xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) { + if (whichfork == XFS_DATA_FORK && + (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF); } @@ -6425,8 +6426,8 @@ xfs_bmap_count_tree( for (;;) { nextbno = be64_to_cpu(block->bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); - if (unlikely(xfs_bmap_disk_count_leaves(ifp, - 0, block, numrecs, count) < 0)) { + if (unlikely(xfs_bmap_disk_count_leaves(0, + block, numrecs, count) < 0)) { xfs_trans_brelse(tp, bp); XFS_ERROR_REPORT("xfs_bmap_count_tree(2)", XFS_ERRLEVEL_LOW, mp); @@ -6472,7 +6473,6 @@ xfs_bmap_count_leaves( */ int xfs_bmap_disk_count_leaves( - xfs_ifork_t *ifp, xfs_extnum_t idx, xfs_bmbt_block_t *block, int numrecs, diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index b847e6a7a3f..de35d18cc00 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -199,7 +199,9 @@ xfs_swap_extents( if (VN_CACHED(tvp) != 0) { xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); - bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED); + error = bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED); + if (error) + goto error0; } /* Verify O_DIRECT for ftmp */ @@ -382,7 +384,7 @@ xfs_swap_extents( xfs_trans_set_sync(tp); } - error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); locked = 0; error0: diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 9d7438bba30..3accc1dcd6c 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -282,8 +282,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, - &needlog, NULL); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); needscan = 0; } } @@ -333,7 +332,7 @@ xfs_dir2_block_addname( */ if (needscan) { xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, - &needlog, NULL); + &needlog); needscan = 0; } /* @@ -418,8 +417,7 @@ xfs_dir2_block_addname( * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, - NULL); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_block_log_tail(tp, bp); @@ -798,8 +796,7 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, - NULL); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_data_check(dp, bp); @@ -996,8 +993,7 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, - NULL); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index f7c79921707..c211c37ef67 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -324,8 +324,7 @@ void xfs_dir2_data_freescan( xfs_mount_t *mp, /* filesystem mount point */ xfs_dir2_data_t *d, /* data block pointer */ - int *loghead, /* out: log data header */ - char *aendp) /* in: caller's endp */ + int *loghead) /* out: log data header */ { xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* active data entry */ @@ -346,9 +345,7 @@ xfs_dir2_data_freescan( * Set up pointers. */ p = (char *)d->u; - if (aendp) - endp = aendp; - else if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp); } else diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h index a6ae2d21c40..c94c9099cfb 100644 --- a/fs/xfs/xfs_dir2_data.h +++ b/fs/xfs/xfs_dir2_data.h @@ -166,7 +166,7 @@ extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d, extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d, xfs_dir2_data_unused_t *dup, int *loghead); extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, - int *loghead, char *aendp); + int *loghead); extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, struct xfs_dabuf **bpp); extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index b1cf1fbf423..db14ea71459 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -133,8 +133,7 @@ xfs_dir2_block_to_leaf( */ block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, - NULL); + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); /* * Set up leaf tail and bests table. */ @@ -414,7 +413,7 @@ xfs_dir2_leaf_addname( * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog, NULL); + xfs_dir2_data_freescan(mp, data, &needlog); /* * Need to log the data block's header. */ @@ -1496,7 +1495,7 @@ xfs_dir2_leaf_removename( * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog, NULL); + xfs_dir2_data_freescan(mp, data, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 9ca71719b68..d083c381993 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -904,7 +904,7 @@ xfs_dir2_leafn_remove( * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog, NULL); + xfs_dir2_data_freescan(mp, data, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); xfs_dir2_data_check(dp, dbp); @@ -1705,7 +1705,7 @@ xfs_dir2_node_addname_int( * Rescan the block for bestfree if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog, NULL); + xfs_dir2_data_freescan(mp, data, &needlog); /* * Log the data block header if needed. */ diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index b1af54464f0..8c433163133 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -80,7 +80,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression, int i; int64_t fsid; - if (random() % randfactor) + if (random32() % randfactor) return 0; memcpy(&fsid, fsidp, sizeof(xfs_fsid_t)); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 32c37c1c47a..b599e6be9ec 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -346,7 +346,7 @@ xfs_growfs_data_private( xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree); if (dpct) xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); if (error) { return error; } @@ -605,7 +605,7 @@ xfs_fs_log_dummy( xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); - xfs_trans_commit(tp, 0, NULL); + xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); } diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index c1c89dac19c..114433a22ba 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -879,17 +879,17 @@ xfs_ilock(xfs_inode_t *ip, (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); if (lock_flags & XFS_IOLOCK_EXCL) { - mrupdate(&ip->i_iolock); + mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); } else if (lock_flags & XFS_IOLOCK_SHARED) { - mraccess(&ip->i_iolock); + mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); } if (lock_flags & XFS_ILOCK_EXCL) { - mrupdate(&ip->i_lock); + mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); } else if (lock_flags & XFS_ILOCK_SHARED) { - mraccess(&ip->i_lock); + mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); } xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); } @@ -923,7 +923,7 @@ xfs_ilock_nowait(xfs_inode_t *ip, (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); iolocked = 0; if (lock_flags & XFS_IOLOCK_EXCL) { @@ -983,7 +983,8 @@ xfs_iunlock(xfs_inode_t *ip, (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | + XFS_LOCK_DEP_MASK)) == 0); ASSERT(lock_flags != 0); if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3da9829c19d..3ca5d43b834 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -442,6 +442,7 @@ xfs_iformat( return XFS_ERROR(EFSCORRUPTED); } ip->i_d.di_size = 0; + ip->i_size = 0; ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); break; @@ -980,6 +981,7 @@ xfs_iread( } ip->i_delayed_blks = 0; + ip->i_size = ip->i_d.di_size; /* * Mark the buffer containing the inode as something to keep @@ -1170,6 +1172,7 @@ xfs_ialloc( } ip->i_d.di_size = 0; + ip->i_size = 0; ip->i_d.di_nextents = 0; ASSERT(ip->i_d.di_nblocks == 0); xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); @@ -1340,7 +1343,7 @@ xfs_file_last_byte( } else { last_block = 0; } - size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size); + size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); last_block = XFS_FILEOFF_MAX(last_block, size_last_block); last_byte = XFS_FSB_TO_B(mp, last_block); @@ -1421,7 +1424,7 @@ xfs_itrunc_trace( * must be called again with all the same restrictions as the initial * call. */ -void +int xfs_itruncate_start( xfs_inode_t *ip, uint flags, @@ -1431,9 +1434,10 @@ xfs_itruncate_start( xfs_off_t toss_start; xfs_mount_t *mp; bhv_vnode_t *vp; + int error = 0; ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); - ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); + ASSERT((new_size == 0) || (new_size <= ip->i_size)); ASSERT((flags == XFS_ITRUNC_DEFINITE) || (flags == XFS_ITRUNC_MAYBE)); @@ -1468,7 +1472,7 @@ xfs_itruncate_start( * file size, so there is no way that the data extended * out there. */ - return; + return 0; } last_byte = xfs_file_last_byte(ip); xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, @@ -1477,7 +1481,7 @@ xfs_itruncate_start( if (flags & XFS_ITRUNC_DEFINITE) { bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } else { - bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); + error = bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } } @@ -1486,6 +1490,7 @@ xfs_itruncate_start( ASSERT(VN_CACHED(vp) == 0); } #endif + return error; } /* @@ -1556,7 +1561,7 @@ xfs_itruncate_finish( ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); - ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); + ASSERT((new_size == 0) || (new_size <= ip->i_size)); ASSERT(*tp != NULL); ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_transp == *tp); @@ -1630,8 +1635,20 @@ xfs_itruncate_finish( */ if (fork == XFS_DATA_FORK) { if (ip->i_d.di_nextents > 0) { - ip->i_d.di_size = new_size; - xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); + /* + * If we are not changing the file size then do + * not update the on-disk file size - we may be + * called from xfs_inactive_free_eofblocks(). If we + * update the on-disk file size and then the system + * crashes before the contents of the file are + * flushed to disk then the files may be full of + * holes (ie NULL files bug). + */ + if (ip->i_size != new_size) { + ip->i_d.di_size = new_size; + ip->i_size = new_size; + xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); + } } } else if (sync) { ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); @@ -1746,7 +1763,7 @@ xfs_itruncate_finish( xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); } ntp = xfs_trans_dup(ntp); - (void) xfs_trans_commit(*tp, 0, NULL); + (void) xfs_trans_commit(*tp, 0); *tp = ntp; error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, @@ -1767,7 +1784,19 @@ xfs_itruncate_finish( */ if (fork == XFS_DATA_FORK) { xfs_isize_check(mp, ip, new_size); - ip->i_d.di_size = new_size; + /* + * If we are not changing the file size then do + * not update the on-disk file size - we may be + * called from xfs_inactive_free_eofblocks(). If we + * update the on-disk file size and then the system + * crashes before the contents of the file are + * flushed to disk then the files may be full of + * holes (ie NULL files bug). + */ + if (ip->i_size != new_size) { + ip->i_d.di_size = new_size; + ip->i_size = new_size; + } } xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); ASSERT((new_size != 0) || @@ -1800,7 +1829,7 @@ xfs_igrow_start( ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); - ASSERT(new_size > ip->i_d.di_size); + ASSERT(new_size > ip->i_size); /* * Zero any pages that may have been created by @@ -1808,7 +1837,7 @@ xfs_igrow_start( * and any blocks between the old and new file sizes. */ error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, - ip->i_d.di_size); + ip->i_size); return error; } @@ -1832,13 +1861,14 @@ xfs_igrow_finish( ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); ASSERT(ip->i_transp == tp); - ASSERT(new_size > ip->i_d.di_size); + ASSERT(new_size > ip->i_size); /* * Update the file size. Update the inode change timestamp * if change_flag set. */ ip->i_d.di_size = new_size; + ip->i_size = new_size; if (change_flag) xfs_ichgtime(ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -2321,7 +2351,7 @@ xfs_ifree( ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_nextents == 0); ASSERT(ip->i_d.di_anextents == 0); - ASSERT((ip->i_d.di_size == 0) || + ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); ASSERT(ip->i_d.di_nblocks == 0); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bc823720d88..f75afecef8e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -287,6 +287,7 @@ typedef struct xfs_inode { struct xfs_inode *i_cnext; /* cluster hash link forward */ struct xfs_inode *i_cprev; /* cluster hash link backward */ + xfs_fsize_t i_size; /* in-memory size */ /* Trace buffers per inode. */ #ifdef XFS_BMAP_TRACE struct ktrace *i_xtrace; /* inode extent list trace */ @@ -305,6 +306,8 @@ typedef struct xfs_inode { #endif } xfs_inode_t; +#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ + (ip)->i_size : (ip)->i_d.di_size; /* * i_flags helper functions @@ -379,26 +382,58 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) /* * Flags for inode locking. + * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) + * 1<<16 - 1<<32-1 -- lockdep annotation (integers) */ -#define XFS_IOLOCK_EXCL 0x001 -#define XFS_IOLOCK_SHARED 0x002 -#define XFS_ILOCK_EXCL 0x004 -#define XFS_ILOCK_SHARED 0x008 -#define XFS_IUNLOCK_NONOTIFY 0x010 -/* XFS_IOLOCK_NESTED 0x020 */ -#define XFS_EXTENT_TOKEN_RD 0x040 -#define XFS_SIZE_TOKEN_RD 0x080 +#define XFS_IOLOCK_EXCL (1<<0) +#define XFS_IOLOCK_SHARED (1<<1) +#define XFS_ILOCK_EXCL (1<<2) +#define XFS_ILOCK_SHARED (1<<3) +#define XFS_IUNLOCK_NONOTIFY (1<<4) +/* #define XFS_IOLOCK_NESTED (1<<5) */ +#define XFS_EXTENT_TOKEN_RD (1<<6) +#define XFS_SIZE_TOKEN_RD (1<<7) #define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD) -#define XFS_WILLLEND 0x100 /* Always acquire tokens for lending */ +#define XFS_WILLLEND (1<<8) /* Always acquire tokens for lending */ #define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND) #define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND) #define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND) -/* XFS_SIZE_TOKEN_WANT 0x200 */ +/* TODO:XFS_SIZE_TOKEN_WANT (1<<9) */ -#define XFS_LOCK_MASK \ - (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \ - XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \ - XFS_WILLLEND) +#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ + | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ + | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \ + | XFS_WILLLEND) + +/* + * Flags for lockdep annotations. + * + * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes + * (ie directory operations that require locking a directory inode and + * an entry inode). The first inode gets locked with this flag so it + * gets a lockdep subclass of 1 and the second lock will have a lockdep + * subclass of 0. + * + * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time + * with xfs_lock_inodes(). This flag is used as the starting subclass + * and each subsequent lock acquired will increment the subclass by one. + * So the first lock acquired will have a lockdep subclass of 2, the + * second lock will have a lockdep subclass of 3, and so on. + */ +#define XFS_IOLOCK_SHIFT 16 +#define XFS_IOLOCK_PARENT (1 << XFS_IOLOCK_SHIFT) +#define XFS_IOLOCK_INUMORDER (2 << XFS_IOLOCK_SHIFT) + +#define XFS_ILOCK_SHIFT 24 +#define XFS_ILOCK_PARENT (1 << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_INUMORDER (2 << XFS_ILOCK_SHIFT) + +#define XFS_IOLOCK_DEP_MASK 0x00ff0000 +#define XFS_ILOCK_DEP_MASK 0xff000000 +#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) + +#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) +#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) /* * Flags for xfs_iflush() @@ -481,7 +516,7 @@ uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode_core *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *, struct xfs_bmap_free *); -void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); +int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c index 06d710c9ce4..81548ec72ba 100644 --- a/fs/xfs/xfs_iocore.c +++ b/fs/xfs/xfs_iocore.c @@ -52,7 +52,7 @@ STATIC xfs_fsize_t xfs_size_fn( xfs_inode_t *ip) { - return (ip->i_d.di_size); + return XFS_ISIZE(ip); } STATIC int diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index cc6a7b5a991..3f2b9f2a7b9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -458,7 +458,7 @@ xfs_iomap_write_direct( extsz = ip->i_d.di_extsize; } - isize = ip->i_d.di_size; + isize = ip->i_size; if (io->io_new_size > isize) isize = io->io_new_size; @@ -524,7 +524,7 @@ xfs_iomap_write_direct( xfs_trans_ihold(tp, ip); bmapi_flag = XFS_BMAPI_WRITE; - if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz)) + if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) bmapi_flag |= XFS_BMAPI_PREALLOC; /* @@ -543,7 +543,7 @@ xfs_iomap_write_direct( error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto error0; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error_out; @@ -676,7 +676,7 @@ xfs_iomap_write_delay( offset_fsb = XFS_B_TO_FSBT(mp, offset); retry: - isize = ip->i_d.di_size; + isize = ip->i_size; if (io->io_new_size > isize) isize = io->io_new_size; @@ -817,7 +817,7 @@ xfs_iomap_write_allocate( * we dropped the ilock in the interim. */ - end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size); + end_fsb = XFS_B_TO_FSB(mp, ip->i_size); xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); last_block = XFS_FILEOFF_MAX(last_block, end_fsb); @@ -840,8 +840,7 @@ xfs_iomap_write_allocate( if (error) goto trans_cancel; - error = xfs_trans_commit(tp, - XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error0; @@ -948,7 +947,7 @@ xfs_iomap_write_unwritten( if (error) goto error_on_bmapi_transaction; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return XFS_ERROR(error); diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 3ce204a524b..df441ee936b 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -22,6 +22,7 @@ typedef enum { /* iomap_flags values */ + IOMAP_READ = 0, /* mapping for a read */ IOMAP_EOF = 0x01, /* mapping contains EOF */ IOMAP_HOLE = 0x02, /* mapping covers a hole */ IOMAP_DELAY = 0x04, /* mapping covers delalloc region */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 7775ddc0b3c..e725ddd3de5 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -809,7 +809,7 @@ xfs_inumbers( xfs_buf_relse(agbp); agbp = NULL; /* - * Move up the the last inode in the current + * Move up the last inode in the current * chunk. The lookup_ge will always get * us the first inode in the next chunk. */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ca74d3f5910..080fabf61c9 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1509,7 +1509,6 @@ xlog_recover_insert_item_frontq( STATIC int xlog_recover_reorder_trans( - xlog_t *log, xlog_recover_t *trans) { xlog_recover_item_t *first_item, *itemq, *itemq_next; @@ -1867,7 +1866,6 @@ xlog_recover_do_inode_buffer( /*ARGSUSED*/ STATIC void xlog_recover_do_reg_buffer( - xfs_mount_t *mp, xlog_recover_item_t *item, xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) @@ -2083,7 +2081,7 @@ xlog_recover_do_dquot_buffer( if (log->l_quotaoffs_flag & type) return; - xlog_recover_do_reg_buffer(mp, item, bp, buf_f); + xlog_recover_do_reg_buffer(item, bp, buf_f); } /* @@ -2184,7 +2182,7 @@ xlog_recover_do_buffer_trans( (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { - xlog_recover_do_reg_buffer(mp, item, bp, buf_f); + xlog_recover_do_reg_buffer(item, bp, buf_f); } if (error) return XFS_ERROR(error); @@ -2765,7 +2763,7 @@ xlog_recover_do_trans( int error = 0; xlog_recover_item_t *item, *first_item; - if ((error = xlog_recover_reorder_trans(log, trans))) + if ((error = xlog_recover_reorder_trans(trans))) return error; first_item = item = trans->r_itemq; do { @@ -3016,7 +3014,7 @@ xlog_recover_process_efi( } efip->efi_flags |= XFS_EFI_RECOVERED; - xfs_trans_commit(tp, 0, NULL); + xfs_trans_commit(tp, 0); } /* @@ -3143,7 +3141,7 @@ xlog_recover_clear_agi_bucket( xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); - (void) xfs_trans_commit(tp, 0, NULL); + (void) xfs_trans_commit(tp, 0); } /* @@ -3886,8 +3884,7 @@ xlog_recover( * under the vfs layer, so we can get away with it unless * the device itself is read-only, in which case we fail. */ - if ((error = xfs_dev_is_read_only(log->l_mp, - "recovery required"))) { + if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3bed0cf0d8a..a96bde6df96 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1653,7 +1653,7 @@ xfs_mount_log_sbunit( return; } xfs_mod_sb(tp, fields); - xfs_trans_commit(tp, 0, NULL); + xfs_trans_commit(tp, 0); } @@ -1734,11 +1734,13 @@ xfs_icsb_cpu_notify( per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: /* Easy Case - initialize the area and locks, and * then rebalance when online does everything else for us. */ memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: xfs_icsb_lock(mp); xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); @@ -1746,6 +1748,7 @@ xfs_icsb_cpu_notify( xfs_icsb_unlock(mp); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* Disable all the counters, then fold the dead cpu's * count into the total on the global superblock and * re-enable the counters. */ diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 320d63ff9ca..0d594ed7efe 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -78,7 +78,7 @@ xfs_mount_reset_sbqflags(xfs_mount_t *mp) return error; } xfs_mod_sb(tp, XFS_SB_QFLAGS); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); return error; } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 9dcb32aa4e2..6f14df976f7 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -154,10 +154,11 @@ typedef struct xfs_qoff_logformat { #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) -#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) +#define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) +#define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD) /* * Incore only flags for quotaoff - these bits get cleared when quota(s) diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 4c6573d784c..7679d7a7022 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -584,7 +584,7 @@ xfs_rename( * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (target_ip != NULL) { xfs_refcache_purge_ip(target_ip); IRELE(target_ip); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6fff19dc3cf..b3a5f07bd07 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -150,7 +150,7 @@ xfs_growfs_rt_alloc( error = xfs_bmap_finish(&tp, &flist, &committed); if (error) goto error_exit; - xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); /* * Now we need to clear the allocated blocks. * Do this one block per transaction, to keep it simple. @@ -187,7 +187,7 @@ xfs_growfs_rt_alloc( /* * Commit the transaction. */ - xfs_trans_commit(tp, 0, NULL); + xfs_trans_commit(tp, 0); } /* * Go on to the next extent, if any. @@ -2042,7 +2042,7 @@ xfs_growfs_rt( /* * Commit the transaction. */ - xfs_trans_commit(tp, 0, NULL); + xfs_trans_commit(tp, 0); } if (error) diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 1ea7c0ca6ae..905d1c008be 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -83,7 +83,7 @@ xfs_write_clear_setuid( } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; } @@ -164,7 +164,7 @@ xfs_write_sync_logforce( xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); } } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 301ff9445b6..cc2d60951e2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -753,7 +753,6 @@ int _xfs_trans_commit( xfs_trans_t *tp, uint flags, - xfs_lsn_t *commit_lsn_p, int *log_flushed) { xfs_log_iovec_t *log_vector; @@ -812,8 +811,6 @@ shut_us_down: xfs_trans_free_busy(tp); xfs_trans_free(tp); XFS_STATS_INC(xs_trans_empty); - if (commit_lsn_p) - *commit_lsn_p = commit_lsn; return (shutdown); } ASSERT(tp->t_ticket != NULL); @@ -864,9 +861,6 @@ shut_us_down: kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t)); } - if (commit_lsn_p) - *commit_lsn_p = commit_lsn; - /* * If we got a log write error. Unpin the logitems that we * had pinned, clean up, free trans structure, and return error. diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index f1d7ab23672..7dfcc450366 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -988,10 +988,8 @@ void xfs_trans_log_efd_extent(xfs_trans_t *, xfs_extlen_t); int _xfs_trans_commit(xfs_trans_t *, uint flags, - xfs_lsn_t *, int *); -#define xfs_trans_commit(tp, flags, lsn) \ - _xfs_trans_commit(tp, flags, lsn, NULL) +#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) void xfs_trans_cancel(xfs_trans_t *, int); void xfs_trans_ail_init(struct xfs_mount *); xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 9014d7e4448..20ffec308e1 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -222,7 +222,7 @@ xfs_dir_ialloc( } ntp = xfs_trans_dup(tp); - code = xfs_trans_commit(tp, 0, NULL); + code = xfs_trans_commit(tp, 0); tp = ntp; if (committed != NULL) { *committed = 1; @@ -420,7 +420,11 @@ xfs_truncate_file( * in a transaction. */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + } tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, @@ -460,8 +464,7 @@ xfs_truncate_file( XFS_TRANS_ABORT); } else { xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, - NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 29f72f61378..65c561201cb 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -696,7 +696,7 @@ xfs_unmount_flush( bhv_vnode_t *rvp = XFS_ITOV(rip); int error; - xfs_ilock(rip, XFS_ILOCK_EXCL); + xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); xfs_iflock(rip); /* @@ -1147,7 +1147,7 @@ xfs_sync_inodes( if (XFS_FORCED_SHUTDOWN(mp)) { bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF); } else { - bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF); + error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF); } xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -1539,7 +1539,7 @@ xfs_syncsub( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_log_force(mp, (xfs_lsn_t)0, log_flags); } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 52c41714ec5..de17aed578f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -133,7 +133,7 @@ xfs_getattr( if (!(flags & ATTR_LAZY)) xfs_ilock(ip, XFS_ILOCK_SHARED); - vap->va_size = ip->i_d.di_size; + vap->va_size = XFS_ISIZE(ip); if (vap->va_mask == XFS_AT_SIZE) goto all_done; @@ -496,7 +496,7 @@ xfs_setattr( if (mask & XFS_AT_SIZE) { /* Short circuit the truncate case for zero length files */ if ((vap->va_size == 0) && - (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { + (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; if (mask & XFS_AT_CTIME) @@ -614,7 +614,7 @@ xfs_setattr( */ if (mask & XFS_AT_SIZE) { code = 0; - if ((vap->va_size > ip->i_d.di_size) && + if ((vap->va_size > ip->i_size) && (flags & ATTR_NOSIZETOK) == 0) { code = xfs_igrow_start(ip, vap->va_size, credp); } @@ -654,10 +654,10 @@ xfs_setattr( * Truncate file. Must have write permission and not be a directory. */ if (mask & XFS_AT_SIZE) { - if (vap->va_size > ip->i_d.di_size) { + if (vap->va_size > ip->i_size) { xfs_igrow_finish(tp, ip, vap->va_size, !(flags & ATTR_DMI)); - } else if ((vap->va_size <= ip->i_d.di_size) || + } else if ((vap->va_size <= ip->i_size) || ((vap->va_size == 0) && ip->i_d.di_nextents)) { /* * signal a sync transaction unless @@ -873,7 +873,7 @@ xfs_setattr( if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - code = xfs_trans_commit(tp, commit_flags, NULL); + code = xfs_trans_commit(tp, commit_flags); } /* @@ -1176,7 +1176,7 @@ xfs_fsync( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (flag & FSYNC_WAIT) xfs_trans_set_sync(tp); - error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); + error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -1221,7 +1221,7 @@ xfs_inactive_free_eofblocks( * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. */ - end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); + end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); map_len = last_fsb - end_fsb; if (map_len <= 0) @@ -1257,8 +1257,12 @@ xfs_inactive_free_eofblocks( * do that within a transaction. */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, - ip->i_d.di_size); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, + ip->i_size); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -1278,7 +1282,7 @@ xfs_inactive_free_eofblocks( xfs_trans_ihold(tp, ip); error = xfs_itruncate_finish(&tp, ip, - ip->i_d.di_size, + ip->i_size, XFS_DATA_FORK, 0); /* @@ -1291,8 +1295,7 @@ xfs_inactive_free_eofblocks( XFS_TRANS_ABORT)); } else { error = xfs_trans_commit(tp, - XFS_TRANS_RELEASE_LOG_RES, - NULL); + XFS_TRANS_RELEASE_LOG_RES); } xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); } @@ -1406,7 +1409,7 @@ xfs_inactive_symlink_rmt( * we need to unlock the inode since the new transaction doesn't * have the inode attached. */ - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); tp = ntp; if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); @@ -1503,7 +1506,7 @@ xfs_inactive_attrs( tp = *tpp; mp = ip->i_mount; ASSERT(ip->i_d.di_forkoff != 0); - xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); error = xfs_attr_inactive(ip); @@ -1565,7 +1568,7 @@ xfs_release( if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & @@ -1626,8 +1629,8 @@ xfs_inactive( * only one with a reference to the inode. */ truncate = ((ip->i_d.di_nlink == 0) && - ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || - (ip->i_delayed_blks > 0)) && + ((ip->i_d.di_size != 0) || (ip->i_size != 0) || + (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); mp = ip->i_mount; @@ -1645,7 +1648,7 @@ xfs_inactive( if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS) && (!(ip->i_d.di_flags & @@ -1675,7 +1678,11 @@ xfs_inactive( */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return VN_INACTIVE_CACHE; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -1790,7 +1797,7 @@ xfs_inactive( * nothing we can do except to try to keep going. */ (void) xfs_bmap_finish(&tp, &free_list, &committed); - (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } /* * Release the dquots held by inode, if any. @@ -1940,7 +1947,7 @@ xfs_create( goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); XFS_BMAP_INIT(&free_list, &first_block); @@ -2026,7 +2033,7 @@ xfs_create( goto abort_rele; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(ip); tp = NULL; @@ -2121,7 +2128,6 @@ int xfs_rm_attempts; STATIC int xfs_lock_dir_and_entry( xfs_inode_t *dp, - bhv_vname_t *dentry, xfs_inode_t *ip) /* inode of entry 'name' */ { int attempts; @@ -2135,7 +2141,7 @@ xfs_lock_dir_and_entry( attempts = 0; again: - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); e_inum = ip->i_ino; @@ -2204,6 +2210,21 @@ int xfs_lock_delays; #endif /* + * Bump the subclass so xfs_lock_inodes() acquires each lock with + * a different value + */ +static inline int +xfs_lock_inumorder(int lock_mode, int subclass) +{ + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) + lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) + lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT; + + return lock_mode; +} + +/* * The following routine will lock n inodes in exclusive mode. * We assume the caller calls us with the inodes in i_ino order. * @@ -2270,7 +2291,7 @@ again: * that is in the AIL. */ ASSERT(i != 0); - if (!xfs_ilock_nowait(ips[i], lock_mode)) { + if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { attempts++; /* @@ -2305,7 +2326,7 @@ again: goto again; } } else { - xfs_ilock(ips[i], lock_mode); + xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); } } @@ -2440,7 +2461,7 @@ xfs_remove( return error; } - error = xfs_lock_dir_and_entry(dp, dentry, ip); + error = xfs_lock_dir_and_entry(dp, ip); if (error) { REMOVE_DEBUG_TRACE(__LINE__); xfs_trans_cancel(tp, cancel_flags); @@ -2511,7 +2532,7 @@ xfs_remove( goto error_rele; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(ip); goto std_return; @@ -2719,7 +2740,7 @@ xfs_link( goto abort_return; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto std_return; @@ -2839,7 +2860,7 @@ xfs_mkdir( goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); /* * Check for directory link count overflow. @@ -2936,7 +2957,7 @@ xfs_mkdir( goto error2; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); if (error) { @@ -3096,7 +3117,7 @@ xfs_rmdir( * that the directory entry for the child directory inode has * not changed while we were obtaining a log reservation. */ - error = xfs_lock_dir_and_entry(dp, dentry, cdp); + error = xfs_lock_dir_and_entry(dp, cdp); if (error) { xfs_trans_cancel(tp, cancel_flags); IRELE(cdp); @@ -3190,7 +3211,7 @@ xfs_rmdir( goto std_return; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(cdp); goto std_return; @@ -3393,7 +3414,7 @@ xfs_symlink( goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); /* * Check whether the directory allows new symlinks or not. @@ -3535,7 +3556,7 @@ xfs_symlink( if (error) { goto error2; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); @@ -3790,7 +3811,7 @@ xfs_set_dmattrs ( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); IHOLD(ip); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); return error; } @@ -4049,14 +4070,14 @@ xfs_alloc_file_space( allocatesize_fsb = XFS_B_TO_FSB(mp, count); /* Generate a DMAPI event if needed. */ - if (alloc_type != 0 && offset < ip->i_d.di_size && + if (alloc_type != 0 && offset < ip->i_size && (attr_flags&ATTR_DMI) == 0 && DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { xfs_off_t end_dmi_offset; end_dmi_offset = offset+len; - if (end_dmi_offset > ip->i_d.di_size) - end_dmi_offset = ip->i_d.di_size; + if (end_dmi_offset > ip->i_size) + end_dmi_offset = ip->i_size; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), offset, end_dmi_offset - offset, 0, NULL); @@ -4148,7 +4169,7 @@ retry: goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { break; @@ -4283,7 +4304,6 @@ xfs_free_file_space( int error; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; - xfs_off_t ilen; xfs_bmbt_irec_t imap; xfs_off_t ioffset; xfs_extlen_t mod=0; @@ -4312,11 +4332,11 @@ xfs_free_file_space( end_dmi_offset = offset + len; endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); - if (offset < ip->i_d.di_size && + if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { - if (end_dmi_offset > ip->i_d.di_size) - end_dmi_offset = ip->i_d.di_size; + if (end_dmi_offset > ip->i_size) + end_dmi_offset = ip->i_size; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, offset, end_dmi_offset - offset, AT_DELAY_FLAG(attr_flags), NULL); @@ -4332,16 +4352,15 @@ xfs_free_file_space( } rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); - ilen = len + (offset & (rounding - 1)); ioffset = offset & ~(rounding - 1); - if (ilen & (rounding - 1)) - ilen = (ilen + rounding) & ~(rounding - 1); if (VN_CACHED(vp) != 0) { xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, ctooff(offtoct(ioffset)), -1); - bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), + error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), -1, FI_REMAPF_LOCKED); + if (error) + goto out_unlock_iolock; } /* @@ -4455,7 +4474,7 @@ xfs_free_file_space( goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -4533,7 +4552,7 @@ xfs_change_file_space( bf->l_start += offset; break; case 2: /*SEEK_END*/ - bf->l_start += ip->i_d.di_size; + bf->l_start += ip->i_size; break; default: return XFS_ERROR(EINVAL); @@ -4550,7 +4569,7 @@ xfs_change_file_space( bf->l_whence = 0; startoffset = bf->l_start; - fsize = ip->i_d.di_size; + fsize = ip->i_size; /* * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve @@ -4649,7 +4668,7 @@ xfs_change_file_space( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); |