diff options
Diffstat (limited to 'fs')
133 files changed, 1958 insertions, 1639 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 455aa207e67..d4bf8caad8d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -109,6 +109,7 @@ source "fs/sysfs/Kconfig" config TMPFS bool "Virtual memory file system support (former shm fs)" + depends on SHMEM help Tmpfs is a file system which keeps all files in virtual memory. diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 3ff8bdd18fb..0931bc1325e 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -21,7 +21,7 @@ static void afs_fl_release_private(struct file_lock *fl); static struct workqueue_struct *afs_lock_manager; static DEFINE_MUTEX(afs_lock_manager_mutex); -static struct file_lock_operations afs_lock_ops = { +static const struct file_lock_operations afs_lock_ops = { .fl_copy_lock = afs_fl_copy_lock, .fl_release_private = afs_fl_release_private, }; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 8630615e57f..852739d262a 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -28,7 +28,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v); static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, size_t size, loff_t *_pos); -static struct seq_operations afs_proc_cells_ops = { +static const struct seq_operations afs_proc_cells_ops = { .start = afs_proc_cells_start, .next = afs_proc_cells_next, .stop = afs_proc_cells_stop, @@ -70,7 +70,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v); static int afs_proc_cell_volumes_show(struct seq_file *m, void *v); -static struct seq_operations afs_proc_cell_volumes_ops = { +static const struct seq_operations afs_proc_cell_volumes_ops = { .start = afs_proc_cell_volumes_start, .next = afs_proc_cell_volumes_next, .stop = afs_proc_cell_volumes_stop, @@ -95,7 +95,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v); static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v); -static struct seq_operations afs_proc_cell_vlservers_ops = { +static const struct seq_operations afs_proc_cell_vlservers_ops = { .start = afs_proc_cell_vlservers_start, .next = afs_proc_cell_vlservers_next, .stop = afs_proc_cell_vlservers_stop, @@ -119,7 +119,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); static int afs_proc_cell_servers_show(struct seq_file *m, void *v); -static struct seq_operations afs_proc_cell_servers_ops = { +static const struct seq_operations afs_proc_cell_servers_ops = { .start = afs_proc_cell_servers_start, .next = afs_proc_cell_servers_next, .stop = afs_proc_cell_servers_stop, @@ -24,6 +24,7 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> +#include <linux/mmu_context.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/aio.h> @@ -34,7 +35,6 @@ #include <asm/kmap_types.h> #include <asm/uaccess.h> -#include <asm/mmu_context.h> #if DEBUG > 1 #define dprintk printk @@ -78,6 +78,7 @@ static int __init aio_setup(void) return 0; } +__initcall(aio_setup); static void aio_free_ring(struct kioctx *ctx) { @@ -380,6 +381,7 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb) __set_current_state(TASK_RUNNING); return iocb->ki_user_data; } +EXPORT_SYMBOL(wait_on_sync_kiocb); /* exit_aio: called when the last user of mm goes away. At this point, * there is no way for any new requests to be submited or any of the @@ -573,6 +575,7 @@ int aio_put_req(struct kiocb *req) spin_unlock_irq(&ctx->ctx_lock); return ret; } +EXPORT_SYMBOL(aio_put_req); static struct kioctx *lookup_ioctx(unsigned long ctx_id) { @@ -595,51 +598,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) } /* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * Called by the retry thread execute retries within the - * iocb issuer's mm context, so that copy_from/to_user - * operations work seamlessly for aio. - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -static void use_mm(struct mm_struct *mm) -{ - struct mm_struct *active_mm; - struct task_struct *tsk = current; - - task_lock(tsk); - active_mm = tsk->active_mm; - atomic_inc(&mm->mm_count); - tsk->mm = mm; - tsk->active_mm = mm; - switch_mm(active_mm, mm, tsk); - task_unlock(tsk); - - mmdrop(active_mm); -} - -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -static void unuse_mm(struct mm_struct *mm) -{ - struct task_struct *tsk = current; - - task_lock(tsk); - tsk->mm = NULL; - /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, tsk); - task_unlock(tsk); -} - -/* * Queue up a kiocb to be retried. Assumes that the kiocb * has already been marked as kicked, and places it on * the retry run list for the corresponding ioctx, if it @@ -1037,6 +995,7 @@ put_rq: spin_unlock_irqrestore(&ctx->ctx_lock, flags); return ret; } +EXPORT_SYMBOL(aio_complete); /* aio_read_evt * Pull an event off of the ioctx's event ring. Returns the number of @@ -1825,9 +1784,3 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout); return ret; } - -__initcall(aio_setup); - -EXPORT_SYMBOL(aio_complete); -EXPORT_SYMBOL(aio_put_req); -EXPORT_SYMBOL(wait_on_sync_kiocb); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 47d4a01c539..d11c51fc2a3 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -77,28 +77,24 @@ static const struct address_space_operations anon_aops = { * * Creates a new file by hooking it on a single inode. This is useful for files * that do not need to have a full-fledged inode in order to operate correctly. - * All the files created with anon_inode_getfd() will share a single inode, + * All the files created with anon_inode_getfile() will share a single inode, * hence saving memory and avoiding code duplication for the file/inode/dentry - * setup. Returns new descriptor or -error. + * setup. Returns the newly created file* or an error pointer. */ -int anon_inode_getfd(const char *name, const struct file_operations *fops, - void *priv, int flags) +struct file *anon_inode_getfile(const char *name, + const struct file_operations *fops, + void *priv, int flags) { struct qstr this; struct dentry *dentry; struct file *file; - int error, fd; + int error; if (IS_ERR(anon_inode_inode)) - return -ENODEV; + return ERR_PTR(-ENODEV); if (fops->owner && !try_module_get(fops->owner)) - return -ENOENT; - - error = get_unused_fd_flags(flags); - if (error < 0) - goto err_module; - fd = error; + return ERR_PTR(-ENOENT); /* * Link the inode to a directory entry by creating a unique name @@ -110,7 +106,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, this.hash = 0; dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); if (!dentry) - goto err_put_unused_fd; + goto err_module; /* * We know the anon_inode inode count is always greater than zero, @@ -136,16 +132,54 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, file->f_version = 0; file->private_data = priv; + return file; + +err_dput: + dput(dentry); +err_module: + module_put(fops->owner); + return ERR_PTR(error); +} +EXPORT_SYMBOL_GPL(anon_inode_getfile); + +/** + * anon_inode_getfd - creates a new file instance by hooking it up to an + * anonymous inode, and a dentry that describe the "class" + * of the file + * + * @name: [in] name of the "class" of the new file + * @fops: [in] file operations for the new file + * @priv: [in] private data for the new file (will be file's private_data) + * @flags: [in] flags + * + * Creates a new file by hooking it on a single inode. This is useful for files + * that do not need to have a full-fledged inode in order to operate correctly. + * All the files created with anon_inode_getfd() will share a single inode, + * hence saving memory and avoiding code duplication for the file/inode/dentry + * setup. Returns new descriptor or an error code. + */ +int anon_inode_getfd(const char *name, const struct file_operations *fops, + void *priv, int flags) +{ + int error, fd; + struct file *file; + + error = get_unused_fd_flags(flags); + if (error < 0) + return error; + fd = error; + + file = anon_inode_getfile(name, fops, priv, flags); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto err_put_unused_fd; + } fd_install(fd, file); return fd; -err_dput: - dput(dentry); err_put_unused_fd: put_unused_fd(fd); -err_module: - module_put(fops->owner); return error; } EXPORT_SYMBOL_GPL(anon_inode_getfd); diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 2316e944a10..e947915109e 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -90,7 +90,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } - while (d_mountpoint(path.dentry) && follow_down(&path)); + while (d_mountpoint(path.dentry) && follow_down(&path)) ; umount_ok = may_umount(path.mnt); path_put(&path); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 615d5496fe0..dd376c124e7 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -842,7 +842,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = BEFS_SUPER_MAGIC; /* Set real blocksize of fs */ sb_set_blocksize(sb, (ulong) befs_sb->block_size); - sb->s_op = (struct super_operations *) &befs_sops; + sb->s_op = &befs_sops; root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); if (IS_ERR(root)) { ret = PTR_ERR(root); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7c1e65d5487..442d94fe255 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1280,9 +1280,6 @@ static int writenote(struct memelfnote *men, struct file *file, #define DUMP_WRITE(addr, nr) \ if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ goto end_coredump; -#define DUMP_SEEK(off) \ - if (!dump_seek(file, (off))) \ - goto end_coredump; static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags, u8 osabi) @@ -2016,7 +2013,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un goto end_coredump; /* Align to page */ - DUMP_SEEK(dataoff - foffset); + if (!dump_seek(file, dataoff - foffset)) + goto end_coredump; for (vma = first_vma(current, gate_vma); vma != NULL; vma = next_vma(vma, gate_vma)) { @@ -2027,33 +2025,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { struct page *page; - struct vm_area_struct *tmp_vma; - - if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &tmp_vma) <= 0) { - DUMP_SEEK(PAGE_SIZE); - } else { - if (page == ZERO_PAGE(0)) { - if (!dump_seek(file, PAGE_SIZE)) { - page_cache_release(page); - goto end_coredump; - } - } else { - void *kaddr; - flush_cache_page(tmp_vma, addr, - page_to_pfn(page)); - kaddr = kmap(page); - if ((size += PAGE_SIZE) > limit || - !dump_write(file, kaddr, - PAGE_SIZE)) { - kunmap(page); - page_cache_release(page); - goto end_coredump; - } - kunmap(page); - } + int stop; + + page = get_dump_page(addr); + if (page) { + void *kaddr = kmap(page); + stop = ((size += PAGE_SIZE) > limit) || + !dump_write(file, kaddr, PAGE_SIZE); + kunmap(page); page_cache_release(page); - } + } else + stop = !dump_seek(file, PAGE_SIZE); + if (stop) + goto end_coredump; } } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 20fbeced472..76285471073 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1325,9 +1325,6 @@ static int writenote(struct memelfnote *men, struct file *file) #define DUMP_WRITE(addr, nr) \ if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ goto end_coredump; -#define DUMP_SEEK(off) \ - if (!dump_seek(file, (off))) \ - goto end_coredump; static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) { @@ -1518,6 +1515,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, unsigned long *limit, unsigned long mm_flags) { struct vm_area_struct *vma; + int err = 0; for (vma = current->mm->mmap; vma; vma = vma->vm_next) { unsigned long addr; @@ -1525,43 +1523,26 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, if (!maydump(vma, mm_flags)) continue; - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE - ) { - struct vm_area_struct *vma; - struct page *page; - - if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &vma) <= 0) { - DUMP_SEEK(file->f_pos + PAGE_SIZE); - } - else if (page == ZERO_PAGE(0)) { - page_cache_release(page); - DUMP_SEEK(file->f_pos + PAGE_SIZE); - } - else { - void *kaddr; - - flush_cache_page(vma, addr, page_to_pfn(page)); - kaddr = kmap(page); - if ((*size += PAGE_SIZE) > *limit || - !dump_write(file, kaddr, PAGE_SIZE) - ) { - kunmap(page); - page_cache_release(page); - return -EIO; - } + for (addr = vma->vm_start; addr < vma->vm_end; + addr += PAGE_SIZE) { + struct page *page = get_dump_page(addr); + if (page) { + void *kaddr = kmap(page); + *size += PAGE_SIZE; + if (*size > *limit) + err = -EFBIG; + else if (!dump_write(file, kaddr, PAGE_SIZE)) + err = -EIO; kunmap(page); page_cache_release(page); - } + } else if (!dump_seek(file, file->f_pos + PAGE_SIZE)) + err = -EFBIG; + if (err) + goto out; } } - - return 0; - -end_coredump: - return -EFBIG; +out: + return err; } #endif @@ -1802,7 +1783,8 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, goto end_coredump; } - DUMP_SEEK(dataoff); + if (!dump_seek(file, dataoff)) + goto end_coredump; if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) goto end_coredump; diff --git a/fs/block_dev.c b/fs/block_dev.c index 71e7e03ac34..5d1ed50bd46 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1114,7 +1114,7 @@ EXPORT_SYMBOL(revalidate_disk); int check_disk_change(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; - struct block_device_operations * bdops = disk->fops; + const struct block_device_operations *bdops = disk->fops; if (!bdops->media_changed) return 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8b819279001..6c4173146bb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -772,7 +772,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) } } -static struct address_space_operations btree_aops = { +static const struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, .writepages = btree_writepages, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 59cba180fe8..9096fd0ca3c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -55,13 +55,13 @@ struct btrfs_iget_args { struct btrfs_root *root; }; -static struct inode_operations btrfs_dir_inode_operations; -static struct inode_operations btrfs_symlink_inode_operations; -static struct inode_operations btrfs_dir_ro_inode_operations; -static struct inode_operations btrfs_special_inode_operations; -static struct inode_operations btrfs_file_inode_operations; -static struct address_space_operations btrfs_aops; -static struct address_space_operations btrfs_symlink_aops; +static const struct inode_operations btrfs_dir_inode_operations; +static const struct inode_operations btrfs_symlink_inode_operations; +static const struct inode_operations btrfs_dir_ro_inode_operations; +static const struct inode_operations btrfs_special_inode_operations; +static const struct inode_operations btrfs_file_inode_operations; +static const struct address_space_operations btrfs_aops; +static const struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; static struct extent_io_ops btrfs_extent_io_ops; @@ -5201,7 +5201,7 @@ static int btrfs_permission(struct inode *inode, int mask) return generic_permission(inode, mask, btrfs_check_acl); } -static struct inode_operations btrfs_dir_inode_operations = { +static const struct inode_operations btrfs_dir_inode_operations = { .getattr = btrfs_getattr, .lookup = btrfs_lookup, .create = btrfs_create, @@ -5219,7 +5219,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .removexattr = btrfs_removexattr, .permission = btrfs_permission, }; -static struct inode_operations btrfs_dir_ro_inode_operations = { +static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, }; @@ -5259,7 +5259,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { * * For now we're avoiding this by dropping bmap. */ -static struct address_space_operations btrfs_aops = { +static const struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .writepages = btrfs_writepages, @@ -5271,14 +5271,14 @@ static struct address_space_operations btrfs_aops = { .set_page_dirty = btrfs_set_page_dirty, }; -static struct address_space_operations btrfs_symlink_aops = { +static const struct address_space_operations btrfs_symlink_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, }; -static struct inode_operations btrfs_file_inode_operations = { +static const struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, .setattr = btrfs_setattr, @@ -5290,7 +5290,7 @@ static struct inode_operations btrfs_file_inode_operations = { .fallocate = btrfs_fallocate, .fiemap = btrfs_fiemap, }; -static struct inode_operations btrfs_special_inode_operations = { +static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, .permission = btrfs_permission, @@ -5299,7 +5299,7 @@ static struct inode_operations btrfs_special_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, }; -static struct inode_operations btrfs_symlink_inode_operations = { +static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6d6d06cb6df..2db17cd66fc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -51,7 +51,7 @@ #include "export.h" #include "compression.h" -static struct super_operations btrfs_super_ops; +static const struct super_operations btrfs_super_ops; static void btrfs_put_super(struct super_block *sb) { @@ -675,7 +675,7 @@ static int btrfs_unfreeze(struct super_block *sb) return 0; } -static struct super_operations btrfs_super_ops = { +static const struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d91b0de7c50..30c0d45c1b5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2605,7 +2605,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, extent); cs = btrfs_file_extent_offset(src, extent); cl = btrfs_file_extent_num_bytes(src, - extent);; + extent); if (btrfs_file_extent_compression(src, extent)) { cs = 0; diff --git a/fs/buffer.c b/fs/buffer.c index 90a98865b0c..209f7f15f5f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -52,6 +52,7 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) bh->b_end_io = handler; bh->b_private = private; } +EXPORT_SYMBOL(init_buffer); static int sync_buffer(void *word) { @@ -80,6 +81,7 @@ void unlock_buffer(struct buffer_head *bh) smp_mb__after_clear_bit(); wake_up_bit(&bh->b_state, BH_Lock); } +EXPORT_SYMBOL(unlock_buffer); /* * Block until a buffer comes unlocked. This doesn't stop it @@ -90,6 +92,7 @@ void __wait_on_buffer(struct buffer_head * bh) { wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); } +EXPORT_SYMBOL(__wait_on_buffer); static void __clear_page_buffers(struct page *page) @@ -144,6 +147,7 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) __end_buffer_read_notouch(bh, uptodate); put_bh(bh); } +EXPORT_SYMBOL(end_buffer_read_sync); void end_buffer_write_sync(struct buffer_head *bh, int uptodate) { @@ -164,6 +168,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) unlock_buffer(bh); put_bh(bh); } +EXPORT_SYMBOL(end_buffer_write_sync); /* * Various filesystems appear to want __find_get_block to be non-blocking. @@ -272,6 +277,7 @@ void invalidate_bdev(struct block_device *bdev) invalidate_bh_lrus(); invalidate_mapping_pages(mapping, 0, -1); } +EXPORT_SYMBOL(invalidate_bdev); /* * Kick pdflush then try to free up some ZONE_NORMAL memory. @@ -410,6 +416,7 @@ still_busy: local_irq_restore(flags); return; } +EXPORT_SYMBOL(end_buffer_async_write); /* * If a page's buffers are under async readin (end_buffer_async_read @@ -438,8 +445,8 @@ static void mark_buffer_async_read(struct buffer_head *bh) set_buffer_async_read(bh); } -void mark_buffer_async_write_endio(struct buffer_head *bh, - bh_end_io_t *handler) +static void mark_buffer_async_write_endio(struct buffer_head *bh, + bh_end_io_t *handler) { bh->b_end_io = handler; set_buffer_async_write(bh); @@ -553,7 +560,7 @@ repeat: return err; } -void do_thaw_all(struct work_struct *work) +static void do_thaw_all(struct work_struct *work) { struct super_block *sb; char b[BDEVNAME_SIZE]; @@ -1172,6 +1179,7 @@ void mark_buffer_dirty(struct buffer_head *bh) } } } +EXPORT_SYMBOL(mark_buffer_dirty); /* * Decrement a buffer_head's reference count. If all buffers against a page @@ -1188,6 +1196,7 @@ void __brelse(struct buffer_head * buf) } WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); } +EXPORT_SYMBOL(__brelse); /* * bforget() is like brelse(), except it discards any @@ -1206,6 +1215,7 @@ void __bforget(struct buffer_head *bh) } __brelse(bh); } +EXPORT_SYMBOL(__bforget); static struct buffer_head *__bread_slow(struct buffer_head *bh) { @@ -2218,6 +2228,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) } return 0; } +EXPORT_SYMBOL(block_read_full_page); /* utility function for filesystems that need to do work on expanding * truncates. Uses filesystem pagecache writes to allow the filesystem to @@ -2252,6 +2263,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) out: return err; } +EXPORT_SYMBOL(generic_cont_expand_simple); static int cont_expand_zero(struct file *file, struct address_space *mapping, loff_t pos, loff_t *bytes) @@ -2352,6 +2364,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping, out: return err; } +EXPORT_SYMBOL(cont_write_begin); int block_prepare_write(struct page *page, unsigned from, unsigned to, get_block_t *get_block) @@ -2362,6 +2375,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to, ClearPageUptodate(page); return err; } +EXPORT_SYMBOL(block_prepare_write); int block_commit_write(struct page *page, unsigned from, unsigned to) { @@ -2369,6 +2383,7 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) __block_commit_write(inode,page,from,to); return 0; } +EXPORT_SYMBOL(block_commit_write); /* * block_page_mkwrite() is not allowed to change the file size as it gets @@ -2426,6 +2441,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, out: return ret; } +EXPORT_SYMBOL(block_page_mkwrite); /* * nobh_write_begin()'s prereads are special: the buffer_heads are freed @@ -2849,6 +2865,7 @@ unlock: out: return err; } +EXPORT_SYMBOL(block_truncate_page); /* * The generic ->writepage function for buffer-backed address_spaces @@ -2890,6 +2907,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, zero_user_segment(page, offset, PAGE_CACHE_SIZE); return __block_write_full_page(inode, page, get_block, wbc, handler); } +EXPORT_SYMBOL(block_write_full_page_endio); /* * The generic ->writepage function for buffer-backed address_spaces @@ -2900,7 +2918,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, return block_write_full_page_endio(page, get_block, wbc, end_buffer_async_write); } - +EXPORT_SYMBOL(block_write_full_page); sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block_t *get_block) @@ -2913,6 +2931,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block(inode, block, &tmp, 0); return tmp.b_blocknr; } +EXPORT_SYMBOL(generic_block_bmap); static void end_bio_bh_io_sync(struct bio *bio, int err) { @@ -2982,6 +3001,7 @@ int submit_bh(int rw, struct buffer_head * bh) bio_put(bio); return ret; } +EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) @@ -3043,6 +3063,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) unlock_buffer(bh); } } +EXPORT_SYMBOL(ll_rw_block); /* * For a data-integrity writeout, we need to wait upon any in-progress I/O @@ -3071,6 +3092,7 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(sync_dirty_buffer); /* * try_to_free_buffers() checks if all the buffers on this particular page @@ -3185,6 +3207,7 @@ void block_sync_page(struct page *page) if (mapping) blk_run_backing_dev(mapping->backing_dev_info, page); } +EXPORT_SYMBOL(block_sync_page); /* * There are no bdflush tunables left. But distributions are @@ -3361,29 +3384,3 @@ void __init buffer_init(void) max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); hotcpu_notifier(buffer_cpu_notify, 0); } - -EXPORT_SYMBOL(__bforget); -EXPORT_SYMBOL(__brelse); -EXPORT_SYMBOL(__wait_on_buffer); -EXPORT_SYMBOL(block_commit_write); -EXPORT_SYMBOL(block_prepare_write); -EXPORT_SYMBOL(block_page_mkwrite); -EXPORT_SYMBOL(block_read_full_page); -EXPORT_SYMBOL(block_sync_page); -EXPORT_SYMBOL(block_truncate_page); -EXPORT_SYMBOL(block_write_full_page); -EXPORT_SYMBOL(block_write_full_page_endio); -EXPORT_SYMBOL(cont_write_begin); -EXPORT_SYMBOL(end_buffer_read_sync); -EXPORT_SYMBOL(end_buffer_write_sync); -EXPORT_SYMBOL(end_buffer_async_write); -EXPORT_SYMBOL(file_fsync); -EXPORT_SYMBOL(generic_block_bmap); -EXPORT_SYMBOL(generic_cont_expand_simple); -EXPORT_SYMBOL(init_buffer); -EXPORT_SYMBOL(invalidate_bdev); -EXPORT_SYMBOL(ll_rw_block); -EXPORT_SYMBOL(mark_buffer_dirty); -EXPORT_SYMBOL(submit_bh); -EXPORT_SYMBOL(sync_dirty_buffer); -EXPORT_SYMBOL(unlock_buffer); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 606912d8f2a..fea9e898c4b 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -142,7 +142,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, rc = dns_resolve_server_name_to_ip(*devname, &srvIP); if (rc != 0) { cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", - __func__, *devname, rc));; + __func__, *devname, rc)); goto compose_mount_options_err; } /* md_len = strlen(...) + 12 for 'sep+prefixpath=' @@ -385,7 +385,7 @@ out_err: goto out; } -struct inode_operations cifs_dfs_referral_inode_operations = { +const struct inode_operations cifs_dfs_referral_inode_operations = { .follow_link = cifs_dfs_follow_mountpoint, }; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3610e9958b4..d79ce2e95c2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -50,7 +50,7 @@ #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ #ifdef CONFIG_CIFS_QUOTA -static struct quotactl_ops cifs_quotactl_ops; +static const struct quotactl_ops cifs_quotactl_ops; #endif /* QUOTA */ int cifsFYI = 0; @@ -517,7 +517,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats) return rc; } -static struct quotactl_ops cifs_quotactl_ops = { +static const struct quotactl_ops cifs_quotactl_ops = { .set_xquota = cifs_xquota_set, .get_xquota = cifs_xquota_get, .set_xstate = cifs_xstate_set, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 094325e3f71..ac2b24c192f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -67,7 +67,7 @@ extern int cifs_setattr(struct dentry *, struct iattr *); extern const struct inode_operations cifs_file_inode_ops; extern const struct inode_operations cifs_symlink_inode_ops; -extern struct inode_operations cifs_dfs_referral_inode_operations; +extern const struct inode_operations cifs_dfs_referral_inode_operations; /* Functions related to files and directories */ diff --git a/fs/compat.c b/fs/compat.c index 6d6f98fe64a..3aa48834a22 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -100,13 +100,6 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, st get_compat_timespec(&tv[1], &t[1])) return -EFAULT; - if ((tv[0].tv_nsec == UTIME_OMIT || tv[0].tv_nsec == UTIME_NOW) - && tv[0].tv_sec != 0) - return -EINVAL; - if ((tv[1].tv_nsec == UTIME_OMIT || tv[1].tv_nsec == UTIME_NOW) - && tv[1].tv_sec != 0) - return -EINVAL; - if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) return 0; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 75efb028974..d5f8c96964b 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -18,14 +18,13 @@ #include <linux/mount.h> #include <linux/tty.h> #include <linux/mutex.h> +#include <linux/magic.h> #include <linux/idr.h> #include <linux/devpts_fs.h> #include <linux/parser.h> #include <linux/fsnotify.h> #include <linux/seq_file.h> -#define DEVPTS_SUPER_MAGIC 0x1cd1 - #define DEVPTS_DEFAULT_MODE 0600 /* * ptmx is a new node in /dev/pts and will be unused in legacy (single- diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 1d1d2744223..1c8bb8c3a82 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -386,9 +386,9 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) return rv; } -static struct seq_operations format1_seq_ops; -static struct seq_operations format2_seq_ops; -static struct seq_operations format3_seq_ops; +static const struct seq_operations format1_seq_ops; +static const struct seq_operations format2_seq_ops; +static const struct seq_operations format3_seq_ops; static void *table_seq_start(struct seq_file *seq, loff_t *pos) { @@ -534,21 +534,21 @@ static void table_seq_stop(struct seq_file *seq, void *iter_ptr) } } -static struct seq_operations format1_seq_ops = { +static const struct seq_operations format1_seq_ops = { .start = table_seq_start, .next = table_seq_next, .stop = table_seq_stop, .show = table_seq_show, }; -static struct seq_operations format2_seq_ops = { +static const struct seq_operations format2_seq_ops = { .start = table_seq_start, .next = table_seq_next, .stop = table_seq_stop, .show = table_seq_show, }; -static struct seq_operations format3_seq_ops = { +static const struct seq_operations format3_seq_ops = { .start = table_seq_start, .next = table_seq_next, .stop = table_seq_stop, diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 00b30a2d546..542f625312f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -582,7 +582,7 @@ extern const struct inode_operations ecryptfs_dir_iops; extern const struct inode_operations ecryptfs_symlink_iops; extern const struct super_operations ecryptfs_sops; extern const struct dentry_operations ecryptfs_dops; -extern struct address_space_operations ecryptfs_aops; +extern const struct address_space_operations ecryptfs_aops; extern int ecryptfs_verbosity; extern unsigned int ecryptfs_message_buf_len; extern signed long ecryptfs_message_wait_timeout; diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 5c6bab9786e..05772aeaa8f 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -545,7 +545,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) return rc; } -struct address_space_operations ecryptfs_aops = { +const struct address_space_operations ecryptfs_aops = { .writepage = ecryptfs_writepage, .readpage = ecryptfs_readpage, .write_begin = ecryptfs_write_begin, diff --git a/fs/eventfd.c b/fs/eventfd.c index 31d12de83a2..8b47e4200e6 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -68,11 +68,16 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n) } EXPORT_SYMBOL_GPL(eventfd_signal); +static void eventfd_free_ctx(struct eventfd_ctx *ctx) +{ + kfree(ctx); +} + static void eventfd_free(struct kref *kref) { struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); - kfree(ctx); + eventfd_free_ctx(ctx); } /** @@ -298,9 +303,23 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) } EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); -SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) +/** + * eventfd_file_create - Creates an eventfd file pointer. + * @count: Initial eventfd counter value. + * @flags: Flags for the eventfd file. + * + * This function creates an eventfd file pointer, w/out installing it into + * the fd table. This is useful when the eventfd file is used during the + * initialization of data structures that require extra setup after the eventfd + * creation. So the eventfd creation is split into the file pointer creation + * phase, and the file descriptor installation phase. + * In this way races with userspace closing the newly installed file descriptor + * can be avoided. + * Returns an eventfd file pointer, or a proper error pointer. + */ +struct file *eventfd_file_create(unsigned int count, int flags) { - int fd; + struct file *file; struct eventfd_ctx *ctx; /* Check the EFD_* constants for consistency. */ @@ -308,26 +327,48 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); if (flags & ~EFD_FLAGS_SET) - return -EINVAL; + return ERR_PTR(-EINVAL); ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) - return -ENOMEM; + return ERR_PTR(-ENOMEM); kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; - /* - * When we call this, the initialization must be complete, since - * anon_inode_getfd() will install the fd. - */ - fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, - flags & EFD_SHARED_FCNTL_FLAGS); - if (fd < 0) - kfree(ctx); + file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, + flags & EFD_SHARED_FCNTL_FLAGS); + if (IS_ERR(file)) + eventfd_free_ctx(ctx); + + return file; +} + +SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) +{ + int fd, error; + struct file *file; + + error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS); + if (error < 0) + return error; + fd = error; + + file = eventfd_file_create(count, flags); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto err_put_unused_fd; + } + fd_install(fd, file); + return fd; + +err_put_unused_fd: + put_unused_fd(fd); + + return error; } SYSCALL_DEFINE1(eventfd, unsigned int, count) diff --git a/fs/exec.c b/fs/exec.c index 172ceb6edde..5c833c18d0d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -33,7 +33,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/pagemap.h> -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> @@ -845,6 +845,9 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = 0; no_thread_group: + if (current->mm) + setmax_mm_hiwater_rss(&sig->maxrss, current->mm); + exit_itimers(sig); flush_itimer_signals(); @@ -923,7 +926,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) task_lock(tsk); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_counter_comm(tsk); + perf_event_comm(tsk); } int flush_old_exec(struct linux_binprm * bprm) @@ -997,7 +1000,7 @@ int flush_old_exec(struct linux_binprm * bprm) * security domain: */ if (!get_dumpable(current->mm)) - perf_counter_exit_task(current); + perf_event_exit_task(current); /* An exec changes our domain. We are no longer part of the thread group */ @@ -1354,6 +1357,8 @@ int do_execve(char * filename, if (retval < 0) goto out; + current->stack_start = current->mm->start_stack; + /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 23701f289e9..dd7175ce560 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -70,7 +70,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str if (PTR_ERR(inode) == -ESTALE) { ext2_error(dir->i_sb, __func__, "deleted inode referenced: %lu", - ino); + (unsigned long) ino); return ERR_PTR(-EIO); } else { return ERR_CAST(inode); diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index b72b8588422..c18fbf3e406 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -20,7 +20,7 @@ __inode_direct_access(struct inode *inode, sector_t block, void **kaddr, unsigned long *pfn) { struct block_device *bdev = inode->i_sb->s_bdev; - struct block_device_operations *ops = bdev->bd_disk->fops; + const struct block_device_operations *ops = bdev->bd_disk->fops; sector_t sector; sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ diff --git a/fs/ext3/super.c b/fs/ext3/super.c index a8d80a7f110..72743d36050 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -720,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, static ssize_t ext3_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); -static struct dquot_operations ext3_quota_operations = { +static const struct dquot_operations ext3_quota_operations = { .initialize = dquot_initialize, .drop = dquot_drop, .alloc_space = dquot_alloc_space, @@ -737,7 +737,7 @@ static struct dquot_operations ext3_quota_operations = { .destroy_dquot = dquot_destroy, }; -static struct quotactl_ops ext3_qctl_operations = { +static const struct quotactl_ops ext3_qctl_operations = { .quota_on = ext3_quota_on, .quota_off = vfs_quota_off, .quota_sync = vfs_quota_sync, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4abd683b963..3a798737e30 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2337,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page, /* * Rest of the page in the page_vec * redirty then and skip then. We will - * try to to write them again after + * try to write them again after * starting a new transaction */ redirty_page_for_writepage(wbc, page); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a6b1ab73472..df539ba2777 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -964,7 +964,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, static ssize_t ext4_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); -static struct dquot_operations ext4_quota_operations = { +static const struct dquot_operations ext4_quota_operations = { .initialize = dquot_initialize, .drop = dquot_drop, .alloc_space = dquot_alloc_space, @@ -985,7 +985,7 @@ static struct dquot_operations ext4_quota_operations = { .destroy_dquot = dquot_destroy, }; -static struct quotactl_ops ext4_qctl_operations = { +static const struct quotactl_ops ext4_qctl_operations = { .quota_on = ext4_quota_on, .quota_off = vfs_quota_off, .quota_sync = vfs_quota_sync, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 28c590b7c9d..8f1cfb02a6c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -179,7 +179,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) * always aligned to a 64 bit boundary. * * The size of the buffer is in bytes, but is it assumed that it is - * always ok to to read a complete multiple of 64 bits at the end + * always ok to read a complete multiple of 64 bits at the end * of the block in case the end is no aligned to a natural boundary. * * Return: the block number (bitmap buffer scope) that was found diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a93b885311d..eba6d552d9c 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -31,12 +31,10 @@ #include <linux/statfs.h> #include <linux/security.h> #include <linux/ima.h> +#include <linux/magic.h> #include <asm/uaccess.h> -/* some random number */ -#define HUGETLBFS_MAGIC 0x958458f6 - static const struct super_operations hugetlbfs_ops; static const struct address_space_operations hugetlbfs_aops; const struct file_operations hugetlbfs_file_operations; @@ -507,6 +505,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; INIT_LIST_HEAD(&inode->i_mapping->private_list); info = HUGETLBFS_I(inode); + /* + * The policy is initialized here even if we are creating a + * private inode because initialization simply creates an + * an empty rb tree and calls spin_lock_init(), later when we + * call mpol_free_shared_policy() it will just return because + * the rb tree will still be empty. + */ mpol_shared_policy_init(&info->policy, NULL); switch (mode & S_IFMT) { default: @@ -931,13 +936,19 @@ static struct file_system_type hugetlbfs_fs_type = { static struct vfsmount *hugetlbfs_vfsmount; -static int can_do_hugetlb_shm(void) +static int can_do_hugetlb_shm(int creat_flags) { - return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); + if (creat_flags != HUGETLB_SHMFS_INODE) + return 0; + if (capable(CAP_IPC_LOCK)) + return 1; + if (in_group_p(sysctl_hugetlb_shm_group)) + return 1; + return 0; } struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, - struct user_struct **user) + struct user_struct **user, int creat_flags) { int error = -ENOMEM; struct file *file; @@ -949,7 +960,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); - if (!can_do_hugetlb_shm()) { + if (!can_do_hugetlb_shm(creat_flags)) { *user = current_user(); if (user_shm_lock(size, *user)) { WARN_ONCE(1, diff --git a/fs/inode.c b/fs/inode.c index b2ba83d2c4e..76582b06ab9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/backing-dev.h> #include <linux/wait.h> +#include <linux/rwsem.h> #include <linux/hash.h> #include <linux/swap.h> #include <linux/security.h> @@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly; DEFINE_SPINLOCK(inode_lock); /* - * iprune_mutex provides exclusion between the kswapd or try_to_free_pages + * iprune_sem provides exclusion between the kswapd or try_to_free_pages * icache shrinking path, and the umount path. Without this exclusion, * by the time prune_icache calls iput for the inode whose pages it has * been invalidating, or by the time it calls clear_inode & destroy_inode * from its final dispose_list, the struct super_block they refer to * (for inode->i_sb->s_op) may already have been freed and reused. + * + * We make this an rwsem because the fastpath is icache shrinking. In + * some cases a filesystem may be doing a significant amount of work in + * its inode reclaim code, so this should improve parallelism. */ -static DEFINE_MUTEX(iprune_mutex); +static DECLARE_RWSEM(iprune_sem); /* * Statistics gathering.. @@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode) int inode_init_always(struct super_block *sb, struct inode *inode) { static const struct address_space_operations empty_aops; - static struct inode_operations empty_iops; + static const struct inode_operations empty_iops; static const struct file_operations empty_fops; struct address_space *const mapping = &inode->i_data; @@ -381,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) /* * We can reschedule here without worrying about the list's * consistency because the per-sb list of inodes must not - * change during umount anymore, and because iprune_mutex keeps + * change during umount anymore, and because iprune_sem keeps * shrink_icache_memory() away. */ cond_resched_lock(&inode_lock); @@ -420,7 +425,7 @@ int invalidate_inodes(struct super_block *sb) int busy; LIST_HEAD(throw_away); - mutex_lock(&iprune_mutex); + down_write(&iprune_sem); spin_lock(&inode_lock); inotify_unmount_inodes(&sb->s_inodes); fsnotify_unmount_inodes(&sb->s_inodes); @@ -428,7 +433,7 @@ int invalidate_inodes(struct super_block *sb) spin_unlock(&inode_lock); dispose_list(&throw_away); - mutex_unlock(&iprune_mutex); + up_write(&iprune_sem); return busy; } @@ -467,7 +472,7 @@ static void prune_icache(int nr_to_scan) int nr_scanned; unsigned long reap = 0; - mutex_lock(&iprune_mutex); + down_read(&iprune_sem); spin_lock(&inode_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; @@ -509,7 +514,7 @@ static void prune_icache(int nr_to_scan) spin_unlock(&inode_lock); dispose_list(&freeable); - mutex_unlock(&iprune_mutex); + up_read(&iprune_sem); } /* @@ -695,13 +700,15 @@ void unlock_new_inode(struct inode *inode) } #endif /* - * This is special! We do not need the spinlock - * when clearing I_LOCK, because we're guaranteed - * that nobody else tries to do anything about the - * state of the inode when it is locked, as we - * just created it (so there can be no old holders - * that haven't tested I_LOCK). + * This is special! We do not need the spinlock when clearing I_LOCK, + * because we're guaranteed that nobody else tries to do anything about + * the state of the inode when it is locked, as we just created it (so + * there can be no old holders that haven't tested I_LOCK). + * However we must emit the memory barrier so that other CPUs reliably + * see the clearing of I_LOCK after the other inode initialisation has + * completed. */ + smp_mb(); WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); inode->i_state &= ~(I_LOCK|I_NEW); wake_up_inode(inode); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a8a358bc0f2..53b86e16e5f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -768,7 +768,7 @@ static void jbd2_seq_history_stop(struct seq_file *seq, void *v) { } -static struct seq_operations jbd2_seq_history_ops = { +static const struct seq_operations jbd2_seq_history_ops = { .start = jbd2_seq_history_start, .next = jbd2_seq_history_next, .stop = jbd2_seq_history_stop, @@ -872,7 +872,7 @@ static void jbd2_seq_info_stop(struct seq_file *seq, void *v) { } -static struct seq_operations jbd2_seq_info_ops = { +static const struct seq_operations jbd2_seq_info_ops = { .start = jbd2_seq_info_start, .next = jbd2_seq_info_next, .stop = jbd2_seq_info_stop, diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0035c021395..9a80e8e595d 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -123,7 +123,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child) return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino)); } -static struct export_operations jffs2_export_ops = { +static const struct export_operations jffs2_export_ops = { .get_parent = jffs2_get_parent, .fh_to_dentry = jffs2_fh_to_dentry, .fh_to_parent = jffs2_fh_to_parent, diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 1f3b0fc0d35..fc9032dc886 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) + if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 4336adba952..c81249fef11 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -458,7 +458,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl) nlm_put_lockowner(fl->fl_u.nfs_fl.owner); } -static struct file_lock_operations nlmclnt_lock_ops = { +static const struct file_lock_operations nlmclnt_lock_ops = { .fl_copy_lock = nlmclnt_locks_copy_lock, .fl_release_private = nlmclnt_locks_release_private, }; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7cb076ac6b4..4600c2037b8 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) */ chain = &nlm_hosts[nlm_hash_address(ni->sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) + if (!rpc_cmp_addr(nlm_addr(host), ni->sap)) continue; /* See if we have an NSM handle for this client */ @@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) if (host->h_server != ni->server) continue; if (ni->server && - !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) + !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) continue; /* Move to head of hash chain. */ diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 30c933188dd..f956651d0f6 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) struct nsm_handle *nsm; list_for_each_entry(nsm, &nsm_handles, sm_link) - if (nlm_cmp_addr(nsm_addr(nsm), sap)) + if (rpc_cmp_addr(nsm_addr(nsm), sap)) return nsm; return NULL; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e577a78d7ba..d1001790fa9 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -705,7 +705,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; } -struct lock_manager_operations nlmsvc_lock_operations = { +const struct lock_manager_operations nlmsvc_lock_operations = { .fl_compare_owner = nlmsvc_same_owner, .fl_notify = nlmsvc_notify_blocked, .fl_grant = nlmsvc_grant_deferred, diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 9e4d6aab611..ad478da7ca6 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) { - return nlm_cmp_addr(nlm_srcaddr(host), datap); + return rpc_cmp_addr(nlm_srcaddr(host), datap); } /** diff --git a/fs/locks.c b/fs/locks.c index 19ee18a6829..a8794f233bc 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -434,7 +434,7 @@ static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try) return fl->fl_file == try->fl_file; } -static struct lock_manager_operations lease_manager_ops = { +static const struct lock_manager_operations lease_manager_ops = { .fl_break = lease_break_callback, .fl_release_private = lease_release_private_callback, .fl_mylease = lease_mylease_callback, diff --git a/fs/minix/dir.c b/fs/minix/dir.c index d407e7a0b6f..6198731d7fc 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -308,14 +308,18 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) struct inode *inode = (struct inode*)mapping->host; char *kaddr = page_address(page); loff_t pos = page_offset(page) + (char*)de - kaddr; - unsigned len = minix_sb(inode->i_sb)->s_dirsize; + struct minix_sb_info *sbi = minix_sb(inode->i_sb); + unsigned len = sbi->s_dirsize; int err; lock_page(page); err = __minix_write_begin(NULL, mapping, pos, len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err == 0) { - de->inode = 0; + if (sbi->s_version == MINIX_V3) + ((minix3_dirent *) de)->inode = 0; + else + de->inode = 0; err = dir_commit_chunk(page, pos, len); } else { unlock_page(page); @@ -440,7 +444,10 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page, err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err == 0) { - de->inode = inode->i_ino; + if (sbi->s_version == MINIX_V3) + ((minix3_dirent *) de)->inode = inode->i_ino; + else + de->inode = inode->i_ino; err = dir_commit_chunk(page, pos, sbi->s_dirsize); } else { unlock_page(page); @@ -470,7 +477,14 @@ ino_t minix_inode_by_name(struct dentry *dentry) ino_t res = 0; if (de) { - res = de->inode; + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + struct minix_sb_info *sbi = minix_sb(inode->i_sb); + + if (sbi->s_version == MINIX_V3) + res = ((minix3_dirent *) de)->inode; + else + res = de->inode; dir_put_page(page); } return res; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 9c590722d87..b8b5b30d53f 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1241,7 +1241,7 @@ ncp_date_unix2dos(int unix_date, __le16 *time, __le16 *date) month = 2; } else { nl_day = (year & 3) || day <= 59 ? day : day - 1; - for (month = 0; month < 12; month++) + for (month = 1; month < 12; month++) if (day_n[month] > nl_day) break; } diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index fa038df63ac..53a7ed7eb9c 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -442,7 +442,7 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp, if (dentry) { struct inode* s_inode = dentry->d_inode; - if (inode) { + if (s_inode) { NCP_FINFO(s_inode)->volNumber = vnum; NCP_FINFO(s_inode)->dirEntNum = de; NCP_FINFO(s_inode)->DosDirNum = dosde; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e5a2dac5f71..76b0aa0f73b 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -222,7 +222,7 @@ static unsigned decode_sessionid(struct xdr_stream *xdr, p = read_buf(xdr, len); if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE);; + return htonl(NFS4ERR_RESOURCE); memcpy(sid->data, p, len); return 0; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e350bd6a233..152025358da 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -933,10 +933,6 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str goto out_error; nfs_server_set_fsinfo(server, &fsinfo); - error = bdi_init(&server->backing_dev_info); - if (error) - goto out_error; - /* Get some general file system info */ if (server->namelen == 0) { @@ -995,6 +991,12 @@ static struct nfs_server *nfs_alloc_server(void) return NULL; } + if (bdi_init(&server->backing_dev_info)) { + nfs_free_iostats(server->io_stats); + kfree(server); + return NULL; + } + return server; } @@ -1529,7 +1531,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_server_list_stop(struct seq_file *p, void *v); static int nfs_server_list_show(struct seq_file *m, void *v); -static struct seq_operations nfs_server_list_ops = { +static const struct seq_operations nfs_server_list_ops = { .start = nfs_server_list_start, .next = nfs_server_list_next, .stop = nfs_server_list_stop, @@ -1550,7 +1552,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_volume_list_stop(struct seq_file *p, void *v); static int nfs_volume_list_show(struct seq_file *m, void *v); -static struct seq_operations nfs_volume_list_ops = { +static const struct seq_operations nfs_volume_list_ops = { .start = nfs_volume_list_start, .next = nfs_volume_list_next, .stop = nfs_volume_list_stop, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1434080aefe..2ef4fecf398 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -638,7 +638,7 @@ static void nfs4_fl_release_lock(struct file_lock *fl) nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); } -static struct file_lock_operations nfs4_fl_lock_ops = { +static const struct file_lock_operations nfs4_fl_lock_ops = { .fl_copy_lock = nfs4_fl_copy_lock, .fl_release_private = nfs4_fl_release_lock, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index de935692d40..f1cc0587cfe 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2190,8 +2190,8 @@ static void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); - bdi_unregister(&server->backing_dev_info); kill_anon_super(s); + bdi_unregister(&server->backing_dev_info); nfs_fscache_release_super_cookie(s); nfs_free_server(server); } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index d9462643155..c1c9e035d4a 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1341,6 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rv) goto out; rv = check_nfsd_access(exp, rqstp); + if (rv) + fh_put(fhp); out: exp_put(exp); return rv; @@ -1515,7 +1517,7 @@ static int e_show(struct seq_file *m, void *p) return svc_export_show(m, &svc_export_cache, cp); } -struct seq_operations nfs_exports_op = { +const struct seq_operations nfs_exports_op = { .start = e_start, .next = e_next, .stop = e_stop, diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 01d4ec1c88e..edf926e1062 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -814,17 +814,6 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, return p; } -static __be32 * -encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, - struct svc_fh *fhp) -{ - p = encode_post_op_attr(cd->rqstp, p, fhp); - *p++ = xdr_one; /* yes, a file handle follows */ - p = encode_fh(p, fhp); - fh_put(fhp); - return p; -} - static int compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, const char *name, int namlen) @@ -836,29 +825,54 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, dparent = cd->fh.fh_dentry; exp = cd->fh.fh_export; - fh_init(fhp, NFS3_FHSIZE); if (isdotent(name, namlen)) { if (namlen == 2) { dchild = dget_parent(dparent); if (dchild == dparent) { /* filesystem root - cannot return filehandle for ".." */ dput(dchild); - return 1; + return -ENOENT; } } else dchild = dget(dparent); } else dchild = lookup_one_len(name, dparent, namlen); if (IS_ERR(dchild)) - return 1; - if (d_mountpoint(dchild) || - fh_compose(fhp, exp, dchild, &cd->fh) != 0 || - !dchild->d_inode) - rv = 1; + return -ENOENT; + rv = -ENOENT; + if (d_mountpoint(dchild)) + goto out; + rv = fh_compose(fhp, exp, dchild, &cd->fh); + if (rv) + goto out; + if (!dchild->d_inode) + goto out; + rv = 0; +out: dput(dchild); return rv; } +__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) +{ + struct svc_fh fh; + int err; + + fh_init(&fh, NFS3_FHSIZE); + err = compose_entry_fh(cd, &fh, name, namlen); + if (err) { + *p++ = 0; + *p++ = 0; + goto out; + } + p = encode_post_op_attr(cd->rqstp, p, &fh); + *p++ = xdr_one; /* yes, a file handle follows */ + p = encode_fh(p, &fh); +out: + fh_put(&fh); + return p; +} + /* * Encode a directory entry. This one works for both normal readdir * and readdirplus. @@ -929,16 +943,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, p = encode_entry_baggage(cd, p, name, namlen, ino); - /* throw in readdirplus baggage */ - if (plus) { - struct svc_fh fh; - - if (compose_entry_fh(cd, &fh, name, namlen) > 0) { - *p++ = 0; - *p++ = 0; - } else - p = encode_entryplus_baggage(cd, p, &fh); - } + if (plus) + p = encode_entryplus_baggage(cd, p, name, namlen); num_entry_words = p - cd->buffer; } else if (cd->rqstp->rq_respages[pn+1] != NULL) { /* temporarily encode entry into next page, then move back to @@ -951,17 +957,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, p1 = encode_entry_baggage(cd, p1, name, namlen, ino); - /* throw in readdirplus baggage */ - if (plus) { - struct svc_fh fh; - - if (compose_entry_fh(cd, &fh, name, namlen) > 0) { - /* zero out the filehandle */ - *p1++ = 0; - *p1++ = 0; - } else - p1 = encode_entryplus_baggage(cd, p1, &fh); - } + if (plus) + p = encode_entryplus_baggage(cd, p1, name, namlen); /* determine entry word length and lengths to go in pages */ num_entry_words = p1 - tmp; diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 54b8b4140c8..725d02f210e 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -321,7 +321,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, deny = ~pas.group & pas.other; if (deny) { ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; - ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; + ace->flag = eflag; ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_GROUP; ace++; @@ -335,7 +335,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, if (deny) { ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; - ace->access_mask = mask_from_posix(deny, flags); + ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_NAMED; ace->who = pa->e_id; ace++; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3fd23f7acec..24e8d78f8dd 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -43,25 +43,30 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svcsock.h> #include <linux/nfsd/nfsd.h> #include <linux/nfsd/state.h> #include <linux/sunrpc/sched.h> #include <linux/nfs4.h> +#include <linux/sunrpc/xprtsock.h> #define NFSDDBG_FACILITY NFSDDBG_PROC #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 +#define NFS4_STATEID_SIZE 16 /* Index of predefined Linux callback client operations */ enum { - NFSPROC4_CLNT_CB_NULL = 0, + NFSPROC4_CLNT_CB_NULL = 0, NFSPROC4_CLNT_CB_RECALL, + NFSPROC4_CLNT_CB_SEQUENCE, }; enum nfs_cb_opnum4 { OP_CB_RECALL = 4, + OP_CB_SEQUENCE = 11, }; #define NFS4_MAXTAGLEN 20 @@ -70,17 +75,29 @@ enum nfs_cb_opnum4 { #define NFS4_dec_cb_null_sz 0 #define cb_compound_enc_hdr_sz 4 #define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define cb_sequence_enc_sz (sessionid_sz + 4 + \ + 1 /* no referring calls list yet */) +#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) + #define op_enc_sz 1 #define op_dec_sz 2 #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) #define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ 1 + enc_stateid_sz + \ enc_nfs4_fh_sz) #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ op_dec_sz) +struct nfs4_rpc_args { + void *args_op; + struct nfsd4_cb_sequence args_seq; +}; + /* * Generic encode routines from fs/nfs/nfs4xdr.c */ @@ -137,11 +154,13 @@ xdr_error: \ } while (0) struct nfs4_cb_compound_hdr { - int status; - u32 ident; + /* args */ + u32 ident; /* minorversion 0 only */ u32 nops; __be32 *nops_p; u32 minorversion; + /* res */ + int status; u32 taglen; char *tag; }; @@ -238,6 +257,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, hdr->nops++; } +static void +encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, + struct nfs4_cb_compound_hdr *hdr) +{ + __be32 *p; + + if (hdr->minorversion == 0) + return; + + RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); + + WRITE32(OP_CB_SEQUENCE); + WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(args->cbs_clp->cl_cb_seq_nr); + WRITE32(0); /* slotid, always 0 */ + WRITE32(0); /* highest slotid always 0 */ + WRITE32(0); /* cachethis always 0 */ + WRITE32(0); /* FIXME: support referring_call_lists */ + hdr->nops++; +} + static int nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) { @@ -249,15 +289,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) } static int -nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, + struct nfs4_rpc_args *rpc_args) { struct xdr_stream xdr; + struct nfs4_delegation *args = rpc_args->args_op; struct nfs4_cb_compound_hdr hdr = { .ident = args->dl_ident, + .minorversion = rpc_args->args_seq.cbs_minorversion, }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_cb_compound_hdr(&xdr, &hdr); + encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); encode_cb_recall(&xdr, args, &hdr); encode_cb_nops(&hdr); return 0; @@ -299,6 +343,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) return 0; } +/* + * Our current back channel implmentation supports a single backchannel + * with a single slot. + */ +static int +decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, + struct rpc_rqst *rqstp) +{ + struct nfs4_sessionid id; + int status; + u32 dummy; + __be32 *p; + + if (res->cbs_minorversion == 0) + return 0; + + status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); + if (status) + return status; + + /* + * If the server returns different values for sessionID, slotID or + * sequence number, the server is looney tunes. + */ + status = -ESERVERFAULT; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); + memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); + p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); + if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, + NFS4_MAX_SESSIONID_LEN)) { + dprintk("%s Invalid session id\n", __func__); + goto out; + } + READ32(dummy); + if (dummy != res->cbs_clp->cl_cb_seq_nr) { + dprintk("%s Invalid sequence number\n", __func__); + goto out; + } + READ32(dummy); /* slotid must be 0 */ + if (dummy != 0) { + dprintk("%s Invalid slotid\n", __func__); + goto out; + } + /* FIXME: process highest slotid and target highest slotid */ + status = 0; +out: + return status; +} + + static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) { @@ -306,7 +401,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) } static int -nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) +nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, + struct nfsd4_cb_sequence *seq) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr; @@ -316,6 +412,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) status = decode_cb_compound_hdr(&xdr, &hdr); if (status) goto out; + if (seq) { + status = decode_cb_sequence(&xdr, seq, rqstp); + if (status) + goto out; + } status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); out: return status; @@ -377,16 +478,15 @@ static int max_cb_time(void) int setup_callback_client(struct nfs4_client *clp) { - struct sockaddr_in addr; struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { .to_initval = max_cb_time(), .to_retries = 0, }; struct rpc_create_args args = { - .protocol = IPPROTO_TCP, - .address = (struct sockaddr *)&addr, - .addrsize = sizeof(addr), + .protocol = XPRT_TRANSPORT_TCP, + .address = (struct sockaddr *) &cb->cb_addr, + .addrsize = cb->cb_addrlen, .timeout = &timeparms, .program = &cb_program, .prognumber = cb->cb_prog, @@ -399,13 +499,10 @@ int setup_callback_client(struct nfs4_client *clp) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - - /* Initialize address */ - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_port = htons(cb->cb_port); - addr.sin_addr.s_addr = htonl(cb->cb_addr); - + if (cb->cb_minorversion) { + args.bc_xprt = clp->cl_cb_xprt; + args.protocol = XPRT_TRANSPORT_BC_TCP; + } /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { @@ -439,42 +536,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { .rpc_call_done = nfsd4_cb_probe_done, }; -static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) -{ - struct auth_cred acred = { - .machine_cred = 1 - }; +static struct rpc_cred *callback_cred; - /* - * Note in the gss case this doesn't actually have to wait for a - * gss upcall (or any calls to the client); this just creates a - * non-uptodate cred which the rpc state machine will fill in with - * a refresh_upcall later. - */ - return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred, - RPCAUTH_LOOKUP_NEW); +int set_callback_cred(void) +{ + callback_cred = rpc_lookup_machine_cred(); + if (!callback_cred) + return -ENOMEM; + return 0; } + void do_probe_callback(struct nfs4_client *clp) { struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, + .rpc_cred = callback_cred }; - struct rpc_cred *cred; int status; - cred = lookup_cb_cred(cb); - if (IS_ERR(cred)) { - status = PTR_ERR(cred); - goto out; - } - cb->cb_cred = cred; - msg.rpc_cred = cb->cb_cred; status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, &nfsd4_cb_probe_ops, (void *)clp); -out: if (status) { warn_no_callback_path(clp, status); put_nfs4_client(clp); @@ -503,11 +587,95 @@ nfsd4_probe_callback(struct nfs4_client *clp) do_probe_callback(clp); } +/* + * There's currently a single callback channel slot. + * If the slot is available, then mark it busy. Otherwise, set the + * thread for sleeping on the callback RPC wait queue. + */ +static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, + struct rpc_task *task) +{ + struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; + u32 *ptr = (u32 *)clp->cl_sessionid.data; + int status = 0; + + dprintk("%s: %u:%u:%u:%u\n", __func__, + ptr[0], ptr[1], ptr[2], ptr[3]); + + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); + dprintk("%s slot is busy\n", __func__); + status = -EAGAIN; + goto out; + } + + /* + * We'll need the clp during XDR encoding and decoding, + * and the sequence during decoding to verify the reply + */ + args->args_seq.cbs_clp = clp; + task->tk_msg.rpc_resp = &args->args_seq; + +out: + dprintk("%s status=%d\n", __func__, status); + return status; +} + +/* + * TODO: cb_sequence should support referring call lists, cachethis, multiple + * slots, and mark callback channel down on communication errors. + */ +static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_delegation *dp = calldata; + struct nfs4_client *clp = dp->dl_client; + struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; + u32 minorversion = clp->cl_cb_conn.cb_minorversion; + int status = 0; + + args->args_seq.cbs_minorversion = minorversion; + if (minorversion) { + status = nfsd41_cb_setup_sequence(clp, task); + if (status) { + if (status != -EAGAIN) { + /* terminate rpc task */ + task->tk_status = status; + task->tk_action = NULL; + } + return; + } + } + rpc_call_start(task); +} + +static void nfsd4_cb_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_delegation *dp = calldata; + struct nfs4_client *clp = dp->dl_client; + + dprintk("%s: minorversion=%d\n", __func__, + clp->cl_cb_conn.cb_minorversion); + + if (clp->cl_cb_conn.cb_minorversion) { + /* No need for lock, access serialized in nfsd4_cb_prepare */ + ++clp->cl_cb_seq_nr; + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_wake_up_next(&clp->cl_cb_waitq); + dprintk("%s: freed slot, new seqid=%d\n", __func__, + clp->cl_cb_seq_nr); + + /* We're done looking into the sequence information */ + task->tk_msg.rpc_resp = NULL; + } +} + static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfs4_delegation *dp = calldata; struct nfs4_client *clp = dp->dl_client; + nfsd4_cb_done(task, calldata); + switch (task->tk_status) { case -EIO: /* Network partition? */ @@ -520,16 +688,19 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) break; default: /* success, or error we can't handle */ - return; + goto done; } if (dp->dl_retries--) { rpc_delay(task, 2*HZ); task->tk_status = 0; rpc_restart_call(task); + return; } else { atomic_set(&clp->cl_cb_conn.cb_set, 0); warn_no_callback_path(clp, task->tk_status); } +done: + kfree(task->tk_msg.rpc_argp); } static void nfsd4_cb_recall_release(void *calldata) @@ -542,6 +713,7 @@ static void nfsd4_cb_recall_release(void *calldata) } static const struct rpc_call_ops nfsd4_cb_recall_ops = { + .rpc_call_prepare = nfsd4_cb_prepare, .rpc_call_done = nfsd4_cb_recall_done, .rpc_release = nfsd4_cb_recall_release, }; @@ -554,17 +726,24 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; + struct nfs4_rpc_args *args; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], - .rpc_argp = dp, - .rpc_cred = clp->cl_cb_conn.cb_cred + .rpc_cred = callback_cred }; - int status; + int status = -ENOMEM; + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + goto out; + args->args_op = dp; + msg.rpc_argp = args; dp->dl_retries = 1; status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); +out: if (status) { + kfree(args); put_nfs4_client(clp); nfs4_put_delegation(dp); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7c8801769a3..bebc0c2e1b0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -68,7 +68,6 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, u32 *bmval, u32 *writable) { struct dentry *dentry = cstate->current_fh.fh_dentry; - struct svc_export *exp = cstate->current_fh.fh_export; /* * Check about attributes are supported by the NFSv4 server or not. @@ -80,17 +79,13 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_attrnotsupp; /* - * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported + * Check FATTR4_WORD0_ACL can be supported * in current environment or not. */ if (bmval[0] & FATTR4_WORD0_ACL) { if (!IS_POSIXACL(dentry->d_inode)) return nfserr_attrnotsupp; } - if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) { - if (exp->ex_fslocs.locations == NULL) - return nfserr_attrnotsupp; - } /* * According to spec, read-only attributes return ERR_INVAL. @@ -123,6 +118,35 @@ nfsd4_check_open_attributes(struct svc_rqst *rqstp, return status; } +static int +is_create_with_attrs(struct nfsd4_open *open) +{ + return open->op_create == NFS4_OPEN_CREATE + && (open->op_createmode == NFS4_CREATE_UNCHECKED + || open->op_createmode == NFS4_CREATE_GUARDED + || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); +} + +/* + * if error occurs when setting the acl, just clear the acl bit + * in the returned attr bitmap. + */ +static void +do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl, u32 *bmval) +{ + __be32 status; + + status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); + if (status) + /* + * We should probably fail the whole open at this point, + * but we've already created the file, so it's too late; + * So this seems the least of evils: + */ + bmval[0] &= ~FATTR4_WORD0_ACL; +} + static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -206,6 +230,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o if (status) goto out; + if (is_create_with_attrs(open) && open->op_acl != NULL) + do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); + set_change_info(&open->op_cinfo, current_fh); fh_dup2(current_fh, &resfh); @@ -536,12 +563,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_badtype; } - if (!status) { - fh_unlock(&cstate->current_fh); - set_change_info(&create->cr_cinfo, &cstate->current_fh); - fh_dup2(&cstate->current_fh, &resfh); - } + if (status) + goto out; + + if (create->cr_acl != NULL) + do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, + create->cr_bmval); + fh_unlock(&cstate->current_fh); + set_change_info(&create->cr_cinfo, &cstate->current_fh); + fh_dup2(&cstate->current_fh, &resfh); +out: fh_put(&resfh); return status; } @@ -947,34 +979,6 @@ static struct nfsd4_operation nfsd4_ops[]; static const char *nfsd4_op_name(unsigned opnum); /* - * This is a replay of a compound for which no cache entry pages - * were used. Encode the sequence operation, and if cachethis is FALSE - * encode the uncache rep error on the next operation. - */ -static __be32 -nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args, - struct nfsd4_compoundres *resp) -{ - struct nfsd4_op *op; - - dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__, - resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis); - - /* Encode the replayed sequence operation */ - BUG_ON(resp->opcnt != 1); - op = &args->ops[resp->opcnt - 1]; - nfsd4_encode_operation(resp, op); - - /*return nfserr_retry_uncached_rep in next operation. */ - if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) { - op = &args->ops[resp->opcnt++]; - op->status = nfserr_retry_uncached_rep; - nfsd4_encode_operation(resp, op); - } - return op->status; -} - -/* * Enforce NFSv4.1 COMPOUND ordering rules. * * TODO: @@ -1083,13 +1087,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, BUG_ON(op->status == nfs_ok); encode_op: - /* Only from SEQUENCE or CREATE_SESSION */ + /* Only from SEQUENCE */ if (resp->cstate.status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); - if (nfsd4_not_cached(resp)) - status = nfsd4_enc_uncached_replay(args, resp); - else - status = op->status; + status = op->status; goto out; } if (op->status == nfserr_replay_me) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 980a216a48c..2153f9bdbeb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -55,6 +55,7 @@ #include <linux/lockd/bind.h> #include <linux/module.h> #include <linux/sunrpc/svcauth_gss.h> +#include <linux/sunrpc/clnt.h> #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -413,36 +414,65 @@ gen_sessionid(struct nfsd4_session *ses) } /* - * Give the client the number of slots it requests bound by - * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. + * The protocol defines ca_maxresponssize_cached to include the size of + * the rpc header, but all we need to cache is the data starting after + * the end of the initial SEQUENCE operation--the rest we regenerate + * each time. Therefore we can advertise a ca_maxresponssize_cached + * value that is the number of bytes in our cache plus a few additional + * bytes. In order to stay on the safe side, and not promise more than + * we can cache, those additional bytes must be the minimum possible: 24 + * bytes of rpc header (xid through accept state, with AUTH_NULL + * verifier), 12 for the compound header (with zero-length tag), and 44 + * for the SEQUENCE op response: + */ +#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) + +/* + * Give the client the number of ca_maxresponsesize_cached slots it + * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, + * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more + * than NFSD_MAX_SLOTS_PER_SESSION. * - * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we - * should (up to a point) re-negotiate active sessions and reduce their - * slot usage to make rooom for new connections. For now we just fail the - * create session. + * If we run out of reserved DRC memory we should (up to a point) + * re-negotiate active sessions and reduce their slot usage to make + * rooom for new connections. For now we just fail the create session. */ -static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) +static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) { - int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; + int mem, size = fchan->maxresp_cached; if (fchan->maxreqs < 1) return nfserr_inval; - else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) - fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - spin_lock(&nfsd_serv->sv_lock); - if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) - np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; - nfsd_serv->sv_drc_pages_used += np; - spin_unlock(&nfsd_serv->sv_lock); + if (size < NFSD_MIN_HDR_SEQ_SZ) + size = NFSD_MIN_HDR_SEQ_SZ; + size -= NFSD_MIN_HDR_SEQ_SZ; + if (size > NFSD_SLOT_CACHE_SIZE) + size = NFSD_SLOT_CACHE_SIZE; + + /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */ + mem = fchan->maxreqs * size; + if (mem > NFSD_MAX_MEM_PER_SESSION) { + fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size; + if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) + fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; + mem = fchan->maxreqs * size; + } - if (np <= 0) { - status = nfserr_resource; - fchan->maxreqs = 0; - } else - fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; + spin_lock(&nfsd_drc_lock); + /* bound the total session drc memory ussage */ + if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) { + fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size; + mem = fchan->maxreqs * size; + } + nfsd_drc_mem_used += mem; + spin_unlock(&nfsd_drc_lock); - return status; + if (fchan->maxreqs == 0) + return nfserr_serverfault; + + fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; + return 0; } /* @@ -466,36 +496,41 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, fchan->maxresp_sz = maxcount; session_fchan->maxresp_sz = fchan->maxresp_sz; - /* Set the max response cached size our default which is - * a multiple of PAGE_SIZE and small */ - session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; - fchan->maxresp_cached = session_fchan->maxresp_cached; - /* Use the client's maxops if possible */ if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; session_fchan->maxops = fchan->maxops; - /* try to use the client requested number of slots */ - if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) - fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - /* FIXME: Error means no more DRC pages so the server should * recover pages from existing sessions. For now fail session * creation. */ - status = set_forechannel_maxreqs(fchan); + status = set_forechannel_drc_size(fchan); + session_fchan->maxresp_cached = fchan->maxresp_cached; session_fchan->maxreqs = fchan->maxreqs; + + dprintk("%s status %d\n", __func__, status); return status; } +static void +free_session_slots(struct nfsd4_session *ses) +{ + int i; + + for (i = 0; i < ses->se_fchannel.maxreqs; i++) + kfree(ses->se_slots[i]); +} + static int alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) { struct nfsd4_session *new, tmp; - int idx, status = nfserr_resource, slotsize; + struct nfsd4_slot *sp; + int idx, slotsize, cachesize, i; + int status; memset(&tmp, 0, sizeof(tmp)); @@ -506,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, if (status) goto out; - /* allocate struct nfsd4_session and slot table in one piece */ - slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) + + sizeof(struct nfsd4_session) > PAGE_SIZE); + + status = nfserr_serverfault; + /* allocate struct nfsd4_session and slot table pointers in one piece */ + slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); if (!new) goto out; memcpy(new, &tmp, sizeof(*new)); + /* allocate each struct nfsd4_slot and data cache in one piece */ + cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + for (i = 0; i < new->se_fchannel.maxreqs; i++) { + sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); + if (!sp) + goto out_free; + new->se_slots[i] = sp; + } + new->se_client = clp; gen_sessionid(new); idx = hash_sessionid(&new->se_sessionid); @@ -530,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, status = nfs_ok; out: return status; +out_free: + free_session_slots(new); + kfree(new); + goto out; } /* caller must hold sessionid_lock */ @@ -572,19 +624,16 @@ release_session(struct nfsd4_session *ses) nfsd4_put_session(ses); } -static void nfsd4_release_respages(struct page **respages, short resused); - void free_session(struct kref *kref) { struct nfsd4_session *ses; - int i; ses = container_of(kref, struct nfsd4_session, se_ref); - for (i = 0; i < ses->se_fchannel.maxreqs; i++) { - struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; - nfsd4_release_respages(e->ce_respages, e->ce_resused); - } + spin_lock(&nfsd_drc_lock); + nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; + spin_unlock(&nfsd_drc_lock); + free_session_slots(ses); kfree(ses); } @@ -647,18 +696,14 @@ shutdown_callback_client(struct nfs4_client *clp) clp->cl_cb_conn.cb_client = NULL; rpc_shutdown_client(clnt); } - if (clp->cl_cb_conn.cb_cred) { - put_rpccred(clp->cl_cb_conn.cb_cred); - clp->cl_cb_conn.cb_cred = NULL; - } } static inline void free_client(struct nfs4_client *clp) { shutdown_callback_client(clp); - nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, - clp->cl_slot.sl_cache_entry.ce_resused); + if (clp->cl_cb_xprt) + svc_xprt_put(clp->cl_cb_xprt); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -714,25 +759,6 @@ expire_client(struct nfs4_client *clp) put_nfs4_client(clp); } -static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) -{ - struct nfs4_client *clp; - - clp = alloc_client(name); - if (clp == NULL) - return NULL; - memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); - atomic_set(&clp->cl_count, 1); - atomic_set(&clp->cl_cb_conn.cb_set, 0); - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_strhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_sessions); - INIT_LIST_HEAD(&clp->cl_lru); - return clp; -} - static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) { memcpy(target->cl_verifier.data, source->data, @@ -795,6 +821,46 @@ static void gen_confirm(struct nfs4_client *clp) *p++ = i++; } +static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, + struct svc_rqst *rqstp, nfs4_verifier *verf) +{ + struct nfs4_client *clp; + struct sockaddr *sa = svc_addr(rqstp); + char *princ; + + clp = alloc_client(name); + if (clp == NULL) + return NULL; + + princ = svc_gss_principal(rqstp); + if (princ) { + clp->cl_principal = kstrdup(princ, GFP_KERNEL); + if (clp->cl_principal == NULL) { + free_client(clp); + return NULL; + } + } + + memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); + atomic_set(&clp->cl_count, 1); + atomic_set(&clp->cl_cb_conn.cb_set, 0); + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_strhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_sessions); + INIT_LIST_HEAD(&clp->cl_lru); + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); + copy_verf(clp, verf); + rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); + clp->cl_flavor = rqstp->rq_flavor; + copy_cred(&clp->cl_cred, &rqstp->rq_cred); + gen_confirm(clp); + + return clp; +} + static int check_name(struct xdr_netobj name) { if (name.len == 0) @@ -902,93 +968,40 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, return NULL; } -/* a helper function for parse_callback */ -static int -parse_octet(unsigned int *lenp, char **addrp) -{ - unsigned int len = *lenp; - char *p = *addrp; - int n = -1; - char c; - - for (;;) { - if (!len) - break; - len--; - c = *p++; - if (c == '.') - break; - if ((c < '0') || (c > '9')) { - n = -1; - break; - } - if (n < 0) - n = 0; - n = (n * 10) + (c - '0'); - if (n > 255) { - n = -1; - break; - } - } - *lenp = len; - *addrp = p; - return n; -} - -/* parse and set the setclientid ipv4 callback address */ -static int -parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) -{ - int temp = 0; - u32 cbaddr = 0; - u16 cbport = 0; - u32 addrlen = addr_len; - char *addr = addr_val; - int i, shift; - - /* ipaddress */ - shift = 24; - for(i = 4; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbaddr |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbaddrp = cbaddr; - - /* port */ - shift = 8; - for(i = 2; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbport |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbportp = cbport; - return 1; -} - static void -gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) { struct nfs4_cb_conn *cb = &clp->cl_cb_conn; - - /* Currently, we only support tcp for the callback channel */ - if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) + unsigned short expected_family; + + /* Currently, we only support tcp and tcp6 for the callback channel */ + if (se->se_callback_netid_len == 3 && + !memcmp(se->se_callback_netid_val, "tcp", 3)) + expected_family = AF_INET; + else if (se->se_callback_netid_len == 4 && + !memcmp(se->se_callback_netid_val, "tcp6", 4)) + expected_family = AF_INET6; + else goto out_err; - if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, - &cb->cb_addr, &cb->cb_port))) + cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + se->se_callback_addr_len, + (struct sockaddr *) &cb->cb_addr, + sizeof(cb->cb_addr)); + + if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) goto out_err; + + if (cb->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; return; out_err: + cb->cb_addr.ss_family = AF_UNSPEC; + cb->cb_addrlen = 0; dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); @@ -996,175 +1009,87 @@ out_err: return; } -void -nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) -{ - struct nfsd4_compoundres *resp = rqstp->rq_resp; - - resp->cstate.statp = statp; -} - /* - * Dereference the result pages. + * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. */ -static void -nfsd4_release_respages(struct page **respages, short resused) +void +nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { - int i; + struct nfsd4_slot *slot = resp->cstate.slot; + unsigned int base; - dprintk("--> %s\n", __func__); - for (i = 0; i < resused; i++) { - if (!respages[i]) - continue; - put_page(respages[i]); - respages[i] = NULL; - } -} + dprintk("--> %s slot %p\n", __func__, slot); -static void -nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) -{ - int i; + slot->sl_opcnt = resp->opcnt; + slot->sl_status = resp->cstate.status; - for (i = 0; i < count; i++) { - topages[i] = frompages[i]; - if (!topages[i]) - continue; - get_page(topages[i]); + if (nfsd4_not_cached(resp)) { + slot->sl_datalen = 0; + return; } + slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; + base = (char *)resp->cstate.datap - + (char *)resp->xbuf->head[0].iov_base; + if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, + slot->sl_datalen)) + WARN("%s: sessions DRC could not cache compound\n", __func__); + return; } /* - * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous - * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total - * length of the XDR response is less than se_fmaxresp_cached - * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a - * of the reply (e.g. readdir). + * Encode the replay sequence operation from the slot values. + * If cachethis is FALSE encode the uncached rep error on the next + * operation which sets resp->p and increments resp->opcnt for + * nfs4svc_encode_compoundres. * - * Store the base and length of the rq_req.head[0] page - * of the NFSv4.1 data, just past the rpc header. */ -void -nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) +static __be32 +nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, + struct nfsd4_compoundres *resp) { - struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; - struct svc_rqst *rqstp = resp->rqstp; - struct nfsd4_compoundargs *args = rqstp->rq_argp; - struct nfsd4_op *op = &args->ops[resp->opcnt]; - struct kvec *resv = &rqstp->rq_res.head[0]; - - dprintk("--> %s entry %p\n", __func__, entry); - - /* Don't cache a failed OP_SEQUENCE. */ - if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) - return; + struct nfsd4_op *op; + struct nfsd4_slot *slot = resp->cstate.slot; - nfsd4_release_respages(entry->ce_respages, entry->ce_resused); - entry->ce_opcnt = resp->opcnt; - entry->ce_status = resp->cstate.status; + dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__, + resp->opcnt, resp->cstate.slot->sl_cachethis); - /* - * Don't need a page to cache just the sequence operation - the slot - * does this for us! - */ + /* Encode the replayed sequence operation */ + op = &args->ops[resp->opcnt - 1]; + nfsd4_encode_operation(resp, op); - if (nfsd4_not_cached(resp)) { - entry->ce_resused = 0; - entry->ce_rpchdrlen = 0; - dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, - resp->cstate.slot->sl_cache_entry.ce_cachethis); - return; - } - entry->ce_resused = rqstp->rq_resused; - if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) - entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; - nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, - entry->ce_resused); - entry->ce_datav.iov_base = resp->cstate.statp; - entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - - (char *)page_address(rqstp->rq_respages[0])); - /* Current request rpc header length*/ - entry->ce_rpchdrlen = (char *)resp->cstate.statp - - (char *)page_address(rqstp->rq_respages[0]); -} - -/* - * We keep the rpc header, but take the nfs reply from the replycache. - */ -static int -nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, - struct nfsd4_cache_entry *entry) -{ - struct svc_rqst *rqstp = resp->rqstp; - struct kvec *resv = &resp->rqstp->rq_res.head[0]; - int len; - - /* Current request rpc header length*/ - len = (char *)resp->cstate.statp - - (char *)page_address(rqstp->rq_respages[0]); - if (entry->ce_datav.iov_len + len > PAGE_SIZE) { - dprintk("%s v41 cached reply too large (%Zd).\n", __func__, - entry->ce_datav.iov_len); - return 0; + /* Return nfserr_retry_uncached_rep in next operation. */ + if (args->opcnt > 1 && slot->sl_cachethis == 0) { + op = &args->ops[resp->opcnt++]; + op->status = nfserr_retry_uncached_rep; + nfsd4_encode_operation(resp, op); } - /* copy the cached reply nfsd data past the current rpc header */ - memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, - entry->ce_datav.iov_len); - resv->iov_len = len + entry->ce_datav.iov_len; - return 1; + return op->status; } /* - * Keep the first page of the replay. Copy the NFSv4.1 data from the first - * cached page. Replace any futher replay pages from the cache. + * The sequence operation is not cached because we can use the slot and + * session values. */ __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { - struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; + struct nfsd4_slot *slot = resp->cstate.slot; __be32 status; - dprintk("--> %s entry %p\n", __func__, entry); - - /* - * If this is just the sequence operation, we did not keep - * a page in the cache entry because we can just use the - * slot info stored in struct nfsd4_sequence that was checked - * against the slot in nfsd4_sequence(). - * - * This occurs when seq->cachethis is FALSE, or when the client - * session inactivity timer fires and a solo sequence operation - * is sent (lease renewal). - */ - if (seq && nfsd4_not_cached(resp)) { - seq->maxslots = resp->cstate.session->se_fchannel.maxreqs; - return nfs_ok; - } - - if (!nfsd41_copy_replay_data(resp, entry)) { - /* - * Not enough room to use the replay rpc header, send the - * cached header. Release all the allocated result pages. - */ - svc_free_res_pages(resp->rqstp); - nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, - entry->ce_resused); - } else { - /* Release all but the first allocated result page */ + dprintk("--> %s slot %p\n", __func__, slot); - resp->rqstp->rq_resused--; - svc_free_res_pages(resp->rqstp); + /* Either returns 0 or nfserr_retry_uncached */ + status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); + if (status == nfserr_retry_uncached_rep) + return status; - nfsd4_copy_pages(&resp->rqstp->rq_respages[1], - &entry->ce_respages[1], - entry->ce_resused - 1); - } + /* The sequence operation has been encoded, cstate->datap set. */ + memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); - resp->rqstp->rq_resused = entry->ce_resused; - resp->opcnt = entry->ce_opcnt; - resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; - status = entry->ce_status; + resp->opcnt = slot->sl_opcnt; + resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); + status = slot->sl_status; return status; } @@ -1194,13 +1119,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, int status; unsigned int strhashval; char dname[HEXDIR_LEN]; + char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct sockaddr *sa = svc_addr(rqstp); + rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " - " ip_addr=%u flags %x, spa_how %d\n", + "ip_addr=%s flags %x, spa_how %d\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, - ip_addr, exid->flags, exid->spa_how); + addr_str, exid->flags, exid->spa_how); if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) return nfserr_inval; @@ -1281,28 +1208,23 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, out_new: /* Normal case */ - new = create_client(exid->clname, dname); + new = create_client(exid->clname, dname, rqstp, &verf); if (new == NULL) { - status = nfserr_resource; + status = nfserr_serverfault; goto out; } - copy_verf(new, &verf); - copy_cred(&new->cl_cred, &rqstp->rq_cred); - new->cl_addr = ip_addr; gen_clid(new); - gen_confirm(new); add_to_unconfirmed(new, strhashval); out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; - new->cl_slot.sl_seqid = 0; exid->seqid = 1; nfsd4_set_ex_flags(new, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_slot.sl_seqid, new->cl_exchange_flags); + new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); status = nfs_ok; out: @@ -1313,40 +1235,60 @@ error: } static int -check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) +check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) { - dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, - slot->sl_seqid); + dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, + slot_seqid); /* The slot is in use, and no response has been sent. */ - if (slot->sl_inuse) { - if (seqid == slot->sl_seqid) + if (slot_inuse) { + if (seqid == slot_seqid) return nfserr_jukebox; else return nfserr_seq_misordered; } /* Normal */ - if (likely(seqid == slot->sl_seqid + 1)) + if (likely(seqid == slot_seqid + 1)) return nfs_ok; /* Replay */ - if (seqid == slot->sl_seqid) + if (seqid == slot_seqid) return nfserr_replay_cache; /* Wraparound */ - if (seqid == 1 && (slot->sl_seqid + 1) == 0) + if (seqid == 1 && (slot_seqid + 1) == 0) return nfs_ok; /* Misordered replay or misordered new request */ return nfserr_seq_misordered; } +/* + * Cache the create session result into the create session single DRC + * slot cache by saving the xdr structure. sl_seqid has been set. + * Do this for solo or embedded create session operations. + */ +static void +nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot, int nfserr) +{ + slot->sl_status = nfserr; + memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); +} + +static __be32 +nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot) +{ + memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); + return slot->sl_status; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; - struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; - struct nfsd4_slot *slot = NULL; + struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; nfs4_lock_state(); @@ -1354,40 +1296,38 @@ nfsd4_create_session(struct svc_rqst *rqstp, conf = find_confirmed_client(&cr_ses->clientid); if (conf) { - slot = &conf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot); + cs_slot = &conf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { dprintk("Got a create_session replay! seqid= %d\n", - slot->sl_seqid); - cstate->slot = slot; - cstate->status = status; + cs_slot->sl_seqid); /* Return the cached reply status */ - status = nfsd4_replay_cache_entry(resp, NULL); + status = nfsd4_replay_create_session(cr_ses, cs_slot); goto out; - } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { + } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { status = nfserr_seq_misordered; dprintk("Sequence misordered!\n"); dprintk("Expected seqid= %d but got seqid= %d\n", - slot->sl_seqid, cr_ses->seqid); + cs_slot->sl_seqid, cr_ses->seqid); goto out; } - conf->cl_slot.sl_seqid++; + cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || - (ip_addr != unconf->cl_addr)) { + !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; goto out; } - slot = &unconf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot); + cs_slot = &unconf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; - goto out; + goto out_cache; } - slot->sl_seqid++; /* from 0 to 1 */ + cs_slot->sl_seqid++; /* from 0 to 1 */ move_to_confirmed(unconf); /* @@ -1396,6 +1336,19 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; cr_ses->flags &= ~SESSION4_RDMA; + if (cr_ses->flags & SESSION4_BACK_CHAN) { + unconf->cl_cb_xprt = rqstp->rq_xprt; + svc_xprt_get(unconf->cl_cb_xprt); + rpc_copy_addr( + (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, + sa); + unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa); + unconf->cl_cb_conn.cb_minorversion = + cstate->minorversion; + unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; + unconf->cl_cb_seq_nr = 1; + nfsd4_probe_callback(unconf); + } conf = unconf; } else { status = nfserr_stale_clientid; @@ -1408,12 +1361,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - cr_ses->seqid = slot->sl_seqid; + cr_ses->seqid = cs_slot->sl_seqid; - slot->sl_inuse = true; - cstate->slot = slot; - /* Ensure a page is used for the cache */ - slot->sl_cache_entry.ce_cachethis = 1; +out_cache: + /* cache solo and embedded create sessions under the state lock */ + nfsd4_cache_create_session(cr_ses, cs_slot, status); out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); @@ -1478,18 +1430,23 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (seq->slotid >= session->se_fchannel.maxreqs) goto out; - slot = &session->se_slots[seq->slotid]; + slot = session->se_slots[seq->slotid]; dprintk("%s: slotid %d\n", __func__, seq->slotid); - status = check_slot_seqid(seq->seqid, slot); + /* We do not negotiate the number of slots yet, so set the + * maxslots to the session maxreqs which is used to encode + * sr_highest_slotid and the sr_target_slot id to maxslots */ + seq->maxslots = session->se_fchannel.maxreqs; + + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse); if (status == nfserr_replay_cache) { cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status - * for nfsd4_svc_encode_compoundres processing */ + * for nfsd4_proc_compound processing */ status = nfsd4_replay_cache_entry(resp, seq); cstate->status = nfserr_replay_cache; - goto replay_cache; + goto out; } if (status) goto out; @@ -1497,23 +1454,23 @@ nfsd4_sequence(struct svc_rqst *rqstp, /* Success! bump slot seqid */ slot->sl_inuse = true; slot->sl_seqid = seq->seqid; - slot->sl_cache_entry.ce_cachethis = seq->cachethis; - /* Always set the cache entry cachethis for solo sequence */ - if (nfsd4_is_solo_sequence(resp)) - slot->sl_cache_entry.ce_cachethis = 1; + slot->sl_cachethis = seq->cachethis; cstate->slot = slot; cstate->session = session; -replay_cache: - /* Renew the clientid on success and on replay. - * Hold a session reference until done processing the compound: + /* Hold a session reference until done processing the compound: * nfsd4_put_session called only if the cstate slot is set. */ - renew_client(session->se_client); nfsd4_get_session(session); out: spin_unlock(&sessionid_lock); + /* Renew the clientid on success and on replay */ + if (cstate->session) { + nfs4_lock_state(); + renew_client(session->se_client); + nfs4_unlock_state(); + } dprintk("%s: return %d\n", __func__, ntohl(status)); return status; } @@ -1522,7 +1479,7 @@ __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct xdr_netobj clname = { .len = setclid->se_namelen, .data = setclid->se_name, @@ -1531,7 +1488,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unsigned int strhashval; struct nfs4_client *conf, *unconf, *new; __be32 status; - char *princ; char dname[HEXDIR_LEN]; if (!check_name(clname)) @@ -1554,8 +1510,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - dprintk("NFSD: setclientid: string in use by client" - " at %pI4\n", &conf->cl_addr); + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, + sizeof(addr_str)); + dprintk("NFSD: setclientid: string in use by client " + "at %s\n", addr_str); goto out; } } @@ -1573,7 +1532,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, */ if (unconf) expire_client(unconf); - new = create_client(clname, dname); + new = create_client(clname, dname, rqstp, &clverifier); if (new == NULL) goto out; gen_clid(new); @@ -1590,7 +1549,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, */ expire_client(unconf); } - new = create_client(clname, dname); + new = create_client(clname, dname, rqstp, &clverifier); if (new == NULL) goto out; copy_clid(new, conf); @@ -1600,7 +1559,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * probable client reboot; state will be removed if * confirmed. */ - new = create_client(clname, dname); + new = create_client(clname, dname, rqstp, &clverifier); if (new == NULL) goto out; gen_clid(new); @@ -1611,25 +1570,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * confirmed. */ expire_client(unconf); - new = create_client(clname, dname); + new = create_client(clname, dname, rqstp, &clverifier); if (new == NULL) goto out; gen_clid(new); } - copy_verf(new, &clverifier); - new->cl_addr = sin->sin_addr.s_addr; - new->cl_flavor = rqstp->rq_flavor; - princ = svc_gss_principal(rqstp); - if (princ) { - new->cl_principal = kstrdup(princ, GFP_KERNEL); - if (new->cl_principal == NULL) { - free_client(new); - goto out; - } - } - copy_cred(&new->cl_cred, &rqstp->rq_cred); - gen_confirm(new); - gen_callback(new, setclid); + gen_callback(new, setclid, rpc_get_scope_id(sa)); add_to_unconfirmed(new, strhashval); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; @@ -1651,7 +1597,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid_confirm *setclientid_confirm) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; @@ -1670,9 +1616,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, unconf = find_unconfirmed_client(clid); status = nfserr_clid_inuse; - if (conf && conf->cl_addr != sin->sin_addr.s_addr) + if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) goto out; - if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) + if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) goto out; /* @@ -2163,7 +2109,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) return -EAGAIN; } -static struct lock_manager_operations nfsd_lease_mng_ops = { +static const struct lock_manager_operations nfsd_lease_mng_ops = { .fl_break = nfsd_break_deleg_cb, .fl_release_private = nfsd_release_deleg_cb, .fl_copy_lock = nfsd_copy_lock_deleg_cb, @@ -3368,7 +3314,7 @@ nfs4_transform_lock_offset(struct file_lock *lock) /* Hack!: For now, we're defining this just so we can use a pointer to it * as a unique cookie to identify our (NFSv4's) posix locks. */ -static struct lock_manager_operations nfsd_posix_mng_ops = { +static const struct lock_manager_operations nfsd_posix_mng_ops = { }; static inline void @@ -4072,7 +4018,7 @@ set_max_delegations(void) /* initialization to perform when the nfsd service is started: */ -static void +static int __nfs4_state_start(void) { unsigned long grace_time; @@ -4084,19 +4030,26 @@ __nfs4_state_start(void) printk(KERN_INFO "NFSD: starting %ld-second grace period\n", grace_time/HZ); laundry_wq = create_singlethread_workqueue("nfsd4"); + if (laundry_wq == NULL) + return -ENOMEM; queue_delayed_work(laundry_wq, &laundromat_work, grace_time); set_max_delegations(); + return set_callback_cred(); } -void +int nfs4_state_start(void) { + int ret; + if (nfs4_init) - return; + return 0; nfsd4_load_reboot_recovery_data(); - __nfs4_state_start(); + ret = __nfs4_state_start(); + if (ret) + return ret; nfs4_init = 1; - return; + return 0; } time_t diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2dcc7feaa6f..0fbd50cee1f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1599,7 +1599,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) { struct svc_fh tmp_fh; - char *path, *rootpath; + char *path = NULL, *rootpath; + size_t rootlen; fh_init(&tmp_fh, NFS4_FHSIZE); *stat = exp_pseudoroot(rqstp, &tmp_fh); @@ -1609,14 +1610,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * path = exp->ex_pathname; - if (strncmp(path, rootpath, strlen(rootpath))) { + rootlen = strlen(rootpath); + if (strncmp(path, rootpath, rootlen)) { dprintk("nfsd: fs_locations failed;" "%s is not contained in %s\n", path, rootpath); *stat = nfserr_notsupp; - return NULL; + path = NULL; + goto out; } - - return path + strlen(rootpath); + path += rootlen; +out: + fh_put(&tmp_fh); + return path; } /* @@ -1793,11 +1798,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_nfserr; } } - if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - if (exp->ex_fslocs.locations == NULL) { - bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS; - } - } if ((buflen -= 16) < 0) goto out_resource; @@ -1825,8 +1825,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_resource; if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; - if (!exp->ex_fslocs.locations) - word0 &= ~FATTR4_WORD0_FS_LOCATIONS; if (!word2) { WRITE32(2); WRITE32(word0); @@ -3064,6 +3062,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, WRITE32(0); ADJUST_ARGS(); + resp->cstate.datap = p; /* DRC cache data pointer */ return 0; } @@ -3166,7 +3165,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) return status; session = resp->cstate.session; - if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) + if (session == NULL || slot->sl_cachethis == 0) return status; if (resp->opcnt >= args->opcnt) @@ -3291,6 +3290,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo /* * All that remains is to write the tag and operation count... */ + struct nfsd4_compound_state *cs = &resp->cstate; struct kvec *iov; p = resp->tagp; *p++ = htonl(resp->taglen); @@ -3304,17 +3304,11 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov = &rqstp->rq_res.head[0]; iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); - if (nfsd4_has_session(&resp->cstate)) { - if (resp->cstate.status == nfserr_replay_cache && - !nfsd4_not_cached(resp)) { - iov->iov_len = resp->cstate.iovlen; - } else { - nfsd4_store_cache_entry(resp); - dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); - resp->cstate.slot->sl_inuse = 0; - } - if (resp->cstate.session) - nfsd4_put_session(resp->cstate.session); + if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); + resp->cstate.slot->sl_inuse = false; + nfsd4_put_session(resp->cstate.session); } return 1; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7e906c5b767..00388d2a3c9 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -174,12 +174,13 @@ static const struct file_operations exports_operations = { }; extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); +extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); static struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = nfsd_pool_stats_release, .owner = THIS_MODULE, }; @@ -776,10 +777,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) size -= len; mesg += len; } - - mutex_unlock(&nfsd_mutex); - return (mesg-buf); - + rv = mesg - buf; out_free: kfree(nthreads); mutex_unlock(&nfsd_mutex); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8847f3fbfc1..01965b2f3a7 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -397,44 +397,51 @@ static inline void _fh_update_old(struct dentry *dentry, fh->ofh_dirino = 0; } -__be32 -fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, - struct svc_fh *ref_fh) +static bool is_root_export(struct svc_export *exp) { - /* ref_fh is a reference file handle. - * if it is non-null and for the same filesystem, then we should compose - * a filehandle which is of the same version, where possible. - * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca - * Then create a 32byte filehandle using nfs_fhbase_old - * - */ + return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; +} - u8 version; - u8 fsid_type = 0; - struct inode * inode = dentry->d_inode; - struct dentry *parent = dentry->d_parent; - __u32 *datap; - dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev; - int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root); +static struct super_block *exp_sb(struct svc_export *exp) +{ + return exp->ex_path.dentry->d_inode->i_sb; +} - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", - MAJOR(ex_dev), MINOR(ex_dev), - (long) exp->ex_path.dentry->d_inode->i_ino, - parent->d_name.name, dentry->d_name.name, - (inode ? inode->i_ino : 0)); +static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) +{ + switch (fsid_type) { + case FSID_DEV: + if (!old_valid_dev(exp_sb(exp)->s_dev)) + return 0; + /* FALL THROUGH */ + case FSID_MAJOR_MINOR: + case FSID_ENCODE_DEV: + return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV; + case FSID_NUM: + return exp->ex_flags & NFSEXP_FSID; + case FSID_UUID8: + case FSID_UUID16: + if (!is_root_export(exp)) + return 0; + /* fall through */ + case FSID_UUID4_INUM: + case FSID_UUID16_INUM: + return exp->ex_uuid != NULL; + } + return 1; +} - /* Choose filehandle version and fsid type based on - * the reference filehandle (if it is in the same export) - * or the export options. - */ - retry: + +static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh) +{ + u8 version; + u8 fsid_type; +retry: version = 1; if (ref_fh && ref_fh->fh_export == exp) { version = ref_fh->fh_handle.fh_version; fsid_type = ref_fh->fh_handle.fh_fsid_type; - if (ref_fh == fhp) - fh_put(ref_fh); ref_fh = NULL; switch (version) { @@ -447,58 +454,66 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, goto retry; } - /* Need to check that this type works for this - * export point. As the fsid -> filesystem mapping - * was guided by user-space, there is no guarantee - * that the filesystem actually supports that fsid - * type. If it doesn't we loop around again without - * ref_fh set. + /* + * As the fsid -> filesystem mapping was guided by + * user-space, there is no guarantee that the filesystem + * actually supports that fsid type. If it doesn't we + * loop around again without ref_fh set. */ - switch(fsid_type) { - case FSID_DEV: - if (!old_valid_dev(ex_dev)) - goto retry; - /* FALL THROUGH */ - case FSID_MAJOR_MINOR: - case FSID_ENCODE_DEV: - if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags - & FS_REQUIRES_DEV)) - goto retry; - break; - case FSID_NUM: - if (! (exp->ex_flags & NFSEXP_FSID)) - goto retry; - break; - case FSID_UUID8: - case FSID_UUID16: - if (!root_export) - goto retry; - /* fall through */ - case FSID_UUID4_INUM: - case FSID_UUID16_INUM: - if (exp->ex_uuid == NULL) - goto retry; - break; - } + if (!fsid_type_ok_for_exp(fsid_type, exp)) + goto retry; } else if (exp->ex_flags & NFSEXP_FSID) { fsid_type = FSID_NUM; } else if (exp->ex_uuid) { if (fhp->fh_maxsize >= 64) { - if (root_export) + if (is_root_export(exp)) fsid_type = FSID_UUID16; else fsid_type = FSID_UUID16_INUM; } else { - if (root_export) + if (is_root_export(exp)) fsid_type = FSID_UUID8; else fsid_type = FSID_UUID4_INUM; } - } else if (!old_valid_dev(ex_dev)) + } else if (!old_valid_dev(exp_sb(exp)->s_dev)) /* for newer device numbers, we must use a newer fsid format */ fsid_type = FSID_ENCODE_DEV; else fsid_type = FSID_DEV; + fhp->fh_handle.fh_version = version; + if (version) + fhp->fh_handle.fh_fsid_type = fsid_type; +} + +__be32 +fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, + struct svc_fh *ref_fh) +{ + /* ref_fh is a reference file handle. + * if it is non-null and for the same filesystem, then we should compose + * a filehandle which is of the same version, where possible. + * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca + * Then create a 32byte filehandle using nfs_fhbase_old + * + */ + + struct inode * inode = dentry->d_inode; + struct dentry *parent = dentry->d_parent; + __u32 *datap; + dev_t ex_dev = exp_sb(exp)->s_dev; + + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", + MAJOR(ex_dev), MINOR(ex_dev), + (long) exp->ex_path.dentry->d_inode->i_ino, + parent->d_name.name, dentry->d_name.name, + (inode ? inode->i_ino : 0)); + + /* Choose filehandle version and fsid type based on + * the reference filehandle (if it is in the same export) + * or the export options. + */ + set_version_and_fsid_type(fhp, exp, ref_fh); if (ref_fh == fhp) fh_put(ref_fh); @@ -516,7 +531,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_export = exp; cache_get(&exp->h); - if (version == 0xca) { + if (fhp->fh_handle.fh_version == 0xca) { /* old style filehandle please */ memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; @@ -530,22 +545,22 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, _fh_update_old(dentry, exp, &fhp->fh_handle); } else { int len; - fhp->fh_handle.fh_version = 1; fhp->fh_handle.fh_auth_type = 0; datap = fhp->fh_handle.fh_auth+0; - fhp->fh_handle.fh_fsid_type = fsid_type; - mk_fsid(fsid_type, datap, ex_dev, + mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); - len = key_len(fsid_type); + len = key_len(fhp->fh_handle.fh_fsid_type); datap += len/4; fhp->fh_handle.fh_size = 4 + len; if (inode) _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) + if (fhp->fh_handle.fh_fileid_type == 255) { + fh_put(fhp); return nfserr_opnotsupp; + } } return 0; @@ -639,8 +654,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp) case FSID_DEV: case FSID_ENCODE_DEV: case FSID_MAJOR_MINOR: - if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags - & FS_REQUIRES_DEV) + if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV) return FSIDSOURCE_DEV; break; case FSID_NUM: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 24d58adfe5f..67ea83eedd4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -34,6 +34,7 @@ #include <linux/nfsd/syscall.h> #include <linux/lockd/bind.h> #include <linux/nfsacl.h> +#include <linux/seq_file.h> #define NFSDDBG_FACILITY NFSDDBG_SVC @@ -66,6 +67,16 @@ struct timeval nfssvc_boot; DEFINE_MUTEX(nfsd_mutex); struct svc_serv *nfsd_serv; +/* + * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. + * nfsd_drc_max_pages limits the total amount of memory available for + * version 4.1 DRC caches. + * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. + */ +spinlock_t nfsd_drc_lock; +unsigned int nfsd_drc_max_mem; +unsigned int nfsd_drc_mem_used; + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static struct svc_version * nfsd_acl_version[] = { @@ -235,13 +246,12 @@ void nfsd_reset_versions(void) */ static void set_max_drc(void) { - /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ - #define NFSD_DRC_SIZE_SHIFT 7 - nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() - >> NFSD_DRC_SIZE_SHIFT; - nfsd_serv->sv_drc_pages_used = 0; - dprintk("%s svc_drc_max_pages %u\n", __func__, - nfsd_serv->sv_drc_max_pages); + #define NFSD_DRC_SIZE_SHIFT 10 + nfsd_drc_max_mem = (nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; + nfsd_drc_mem_used = 0; + spin_lock_init(&nfsd_drc_lock); + dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); } int nfsd_create_serv(void) @@ -401,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs) error = nfsd_racache_init(2*nrservs); if (error<0) goto out; - nfs4_state_start(); + error = nfs4_state_start(); + if (error) + goto out; nfsd_reset_versions(); @@ -569,10 +581,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + rqstp->rq_res.head[0].iov_len; rqstp->rq_res.head[0].iov_len += sizeof(__be32); - /* NFSv4.1 DRC requires statp */ - if (rqstp->rq_vers == 4) - nfsd4_set_statp(rqstp, statp); - /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); @@ -607,7 +615,25 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) int nfsd_pool_stats_open(struct inode *inode, struct file *file) { - if (nfsd_serv == NULL) + int ret; + mutex_lock(&nfsd_mutex); + if (nfsd_serv == NULL) { + mutex_unlock(&nfsd_mutex); return -ENODEV; - return svc_pool_stats_open(nfsd_serv, file); + } + /* bump up the psudo refcount while traversing */ + svc_get(nfsd_serv); + ret = svc_pool_stats_open(nfsd_serv, file); + mutex_unlock(&nfsd_mutex); + return ret; +} + +int nfsd_pool_stats_release(struct inode *inode, struct file *file) +{ + int ret = seq_release(inode, file); + mutex_lock(&nfsd_mutex); + /* this function really, really should have been called svc_put() */ + svc_destroy(nfsd_serv); + mutex_unlock(&nfsd_mutex); + return ret; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8fa09bfbcba..a293f027326 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -89,6 +89,12 @@ struct raparm_hbucket { #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; +static inline int +nfsd_v4client(struct svc_rqst *rq) +{ + return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; +} + /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. @@ -115,7 +121,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, path_put(&path); goto out; } - if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { + if (nfsd_v4client(rqstp) || + (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ /* * This is subtle: path.dentry is *not* on path.mnt diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index c668bca579c..6a2711f4c32 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -46,7 +46,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc) INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); } -static struct address_space_operations def_btnode_aops = { +static const struct address_space_operations def_btnode_aops = { .sync_page = block_sync_page, }; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 6bd84a0d823..fc8278c77cd 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -151,7 +151,7 @@ struct file_operations nilfs_file_operations = { .splice_read = generic_file_splice_read, }; -struct inode_operations nilfs_file_inode_operations = { +const struct inode_operations nilfs_file_inode_operations = { .truncate = nilfs_truncate, .setattr = nilfs_setattr, .permission = nilfs_permission, diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 1b3c2bb20da..e6de0a27ab5 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -52,7 +52,7 @@ #include "dat.h" #include "ifile.h" -static struct address_space_operations def_gcinode_aops = { +static const struct address_space_operations def_gcinode_aops = { .sync_page = block_sync_page, }; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 807e584b163..2d2c501deb5 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -238,7 +238,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, return size; } -struct address_space_operations nilfs_aops = { +const struct address_space_operations nilfs_aops = { .writepage = nilfs_writepage, .readpage = nilfs_readpage, .sync_page = block_sync_page, diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 156bf6091a9..b18c4998f8d 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -427,12 +427,12 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) } -static struct address_space_operations def_mdt_aops = { +static const struct address_space_operations def_mdt_aops = { .writepage = nilfs_mdt_write_page, .sync_page = block_sync_page, }; -static struct inode_operations def_mdt_iops; +static const struct inode_operations def_mdt_iops; static struct file_operations def_mdt_fops; /* diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index df70dadb336..ed02e886fa7 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -448,7 +448,7 @@ out: return err; } -struct inode_operations nilfs_dir_inode_operations = { +const struct inode_operations nilfs_dir_inode_operations = { .create = nilfs_create, .lookup = nilfs_lookup, .link = nilfs_link, @@ -462,12 +462,12 @@ struct inode_operations nilfs_dir_inode_operations = { .permission = nilfs_permission, }; -struct inode_operations nilfs_special_inode_operations = { +const struct inode_operations nilfs_special_inode_operations = { .setattr = nilfs_setattr, .permission = nilfs_permission, }; -struct inode_operations nilfs_symlink_inode_operations = { +const struct inode_operations nilfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 724c63766e8..bad7368782d 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -295,12 +295,12 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *); * Inodes and files operations */ extern struct file_operations nilfs_dir_operations; -extern struct inode_operations nilfs_file_inode_operations; +extern const struct inode_operations nilfs_file_inode_operations; extern struct file_operations nilfs_file_operations; -extern struct address_space_operations nilfs_aops; -extern struct inode_operations nilfs_dir_inode_operations; -extern struct inode_operations nilfs_special_inode_operations; -extern struct inode_operations nilfs_symlink_inode_operations; +extern const struct address_space_operations nilfs_aops; +extern const struct inode_operations nilfs_dir_inode_operations; +extern const struct inode_operations nilfs_special_inode_operations; +extern const struct inode_operations nilfs_symlink_inode_operations; /* * filesystem type diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 55f3d6b6073..644e66727dd 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -504,7 +504,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } -static struct super_operations nilfs_sops = { +static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, .destroy_inode = nilfs_destroy_inode, .dirty_inode = nilfs_dirty_inode, @@ -560,7 +560,7 @@ nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, nilfs_nfs_get_inode); } -static struct export_operations nilfs_export_ops = { +static const struct export_operations nilfs_export_ops = { .fh_to_dentry = nilfs_fh_to_dentry, .fh_to_parent = nilfs_fh_to_parent, .get_parent = nilfs_get_parent, diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 4350d4993b1..663c0e341f8 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2146,46 +2146,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } /** - * ntfs_file_writev - - * - * Basically the same as generic_file_writev() except that it ends up calling - * ntfs_file_aio_write_nolock() instead of __generic_file_aio_write_nolock(). - */ -static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct kiocb kiocb; - ssize_t ret; - - mutex_lock(&inode->i_mutex); - init_sync_kiocb(&kiocb, file); - ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - mutex_unlock(&inode->i_mutex); - if (ret > 0) { - int err = generic_write_sync(file, *ppos - ret, ret); - if (err < 0) - ret = err; - } - return ret; -} - -/** - * ntfs_file_write - simple wrapper for ntfs_file_writev() - */ -static ssize_t ntfs_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; - - return ntfs_file_writev(file, &local_iov, 1, ppos); -} - -/** * ntfs_file_fsync - sync a file to disk * @filp: file to be synced * @dentry: dentry describing the file to sync @@ -2247,7 +2207,7 @@ const struct file_operations ntfs_file_ops = { .read = do_sync_read, /* Read from file. */ .aio_read = generic_file_aio_read, /* Async read from file. */ #ifdef NTFS_RW - .write = ntfs_file_write, /* Write to file. */ + .write = do_sync_write, /* Write to file. */ .aio_write = ntfs_file_aio_write, /* Async write to file. */ /*.release = ,*/ /* Last file is closed. See fs/ext2/file.c:: diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 50931b1ce4b..8b2549f672b 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -829,7 +829,7 @@ enum { /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask - is used to to obtain all flags that are valid for setting. */ + is used to obtain all flags that are valid for setting. */ /* * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index cd0be3f5c3c..a44b14cbcee 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask) return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); /* return (void *)__get_free_page(gfp_mask); */ } - if (likely(size >> PAGE_SHIFT < num_physpages)) + if (likely((size >> PAGE_SHIFT) < totalram_pages)) return __vmalloc(size, gfp_mask, PAGE_KERNEL); return NULL; } diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index f8424874fa0..cfb2be708ab 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -163,7 +163,7 @@ static void nst_seq_stop(struct seq_file *seq, void *v) { } -static struct seq_operations nst_seq_ops = { +static const struct seq_operations nst_seq_ops = { .start = nst_seq_start, .next = nst_seq_next, .stop = nst_seq_stop, @@ -344,7 +344,7 @@ static void sc_seq_stop(struct seq_file *seq, void *v) { } -static struct seq_operations sc_seq_ops = { +static const struct seq_operations sc_seq_ops = { .start = sc_seq_start, .next = sc_seq_next, .stop = sc_seq_stop, diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index df52f706f66..c5c88124096 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -683,7 +683,7 @@ static int lockres_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations debug_lockres_ops = { +static const struct seq_operations debug_lockres_ops = { .start = lockres_seq_start, .stop = lockres_seq_stop, .next = lockres_seq_next, diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 3fb96fcd4c8..e5df9d170b0 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -109,7 +109,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); int ocfs2_read_quota_block(struct inode *inode, u64 v_block, struct buffer_head **bh); -extern struct dquot_operations ocfs2_quota_operations; +extern const struct dquot_operations ocfs2_quota_operations; extern struct quota_format_type ocfs2_quota_format; int ocfs2_quota_setup(void); diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 44f2a5e1d04..3cf0ec0acdd 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -154,7 +154,7 @@ static int ocfs2_get_quota_block(struct inode *inode, int block, err = -EIO; mlog_errno(err); } - return err;; + return err; } /* Read data from global quotafile - avoid pagecache and such because we cannot @@ -849,7 +849,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot) kmem_cache_free(ocfs2_dquot_cachep, dquot); } -struct dquot_operations ocfs2_quota_operations = { +const struct dquot_operations ocfs2_quota_operations = { .initialize = dquot_initialize, .drop = dquot_drop, .alloc_space = dquot_alloc_space, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index a3f8871d21f..faca4720aa4 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -965,7 +965,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type, int remount) return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); } -static struct quotactl_ops ocfs2_quotactl_ops = { +static const struct quotactl_ops ocfs2_quotactl_ops = { .quota_on = ocfs2_quota_on, .quota_off = ocfs2_quota_off, .quota_sync = vfs_quota_sync, diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c7275cfbdcf..3680bae335b 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -489,7 +489,7 @@ out: return ret; } -struct inode_operations omfs_dir_inops = { +const struct inode_operations omfs_dir_inops = { .lookup = omfs_lookup, .mkdir = omfs_mkdir, .rename = omfs_rename, diff --git a/fs/omfs/file.c b/fs/omfs/file.c index d17e774eaf4..4845fbb18e6 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -333,11 +333,11 @@ struct file_operations omfs_file_operations = { .splice_read = generic_file_splice_read, }; -struct inode_operations omfs_file_inops = { +const struct inode_operations omfs_file_inops = { .truncate = omfs_truncate }; -struct address_space_operations omfs_aops = { +const struct address_space_operations omfs_aops = { .readpage = omfs_readpage, .readpages = omfs_readpages, .writepage = omfs_writepage, diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 379ae5fb441..f3b7c1541f3 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -278,7 +278,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static struct super_operations omfs_sops = { +static const struct super_operations omfs_sops = { .write_inode = omfs_write_inode, .delete_inode = omfs_delete_inode, .put_super = omfs_put_super, diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h index 2bc0f067040..df71039945a 100644 --- a/fs/omfs/omfs.h +++ b/fs/omfs/omfs.h @@ -45,15 +45,15 @@ extern int omfs_clear_range(struct super_block *sb, u64 block, int count); /* dir.c */ extern struct file_operations omfs_dir_operations; -extern struct inode_operations omfs_dir_inops; +extern const struct inode_operations omfs_dir_inops; extern int omfs_make_empty(struct inode *inode, struct super_block *sb); extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, u64 fsblock); /* file.c */ extern struct file_operations omfs_file_operations; -extern struct inode_operations omfs_file_inops; -extern struct address_space_operations omfs_aops; +extern const struct inode_operations omfs_file_inops; +extern const struct address_space_operations omfs_aops; extern void omfs_make_empty_table(struct buffer_head *bh, int offset); extern int omfs_shrink_inode(struct inode *inode); diff --git a/fs/open.c b/fs/open.c index 31191bf513e..4f01e06227c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -290,10 +290,9 @@ out: return error; } -SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length) +SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) { - /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ - return do_sys_truncate(path, (long)length); + return do_sys_truncate(path, length); } static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index fbeaddf595d..7b685e10cba 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -581,7 +581,7 @@ try_scan: } if (from + size > get_capacity(disk)) { - struct block_device_operations *bdops = disk->fops; + const struct block_device_operations *bdops = disk->fops; unsigned long long capacity; printk(KERN_WARNING diff --git a/fs/proc/array.c b/fs/proc/array.c index 725a650bbbb..0c6bc602e6c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -82,6 +82,7 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/tracehook.h> +#include <linux/swapops.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m, p->nivcsw); } +struct stack_stats { + struct vm_area_struct *vma; + unsigned long startpage; + unsigned long usage; +}; + +static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct stack_stats *ss = walk->private; + struct vm_area_struct *vma = ss->vma; + pte_t *pte, ptent; + spinlock_t *ptl; + int ret = 0; + + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { + ptent = *pte; + +#ifdef CONFIG_STACK_GROWSUP + if (pte_present(ptent) || is_swap_pte(ptent)) + ss->usage = addr - ss->startpage + PAGE_SIZE; +#else + if (pte_present(ptent) || is_swap_pte(ptent)) { + ss->usage = ss->startpage - addr + PAGE_SIZE; + pte++; + ret = 1; + break; + } +#endif + } + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); + return ret; +} + +static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma, + struct task_struct *task) +{ + struct stack_stats ss; + struct mm_walk stack_walk = { + .pmd_entry = stack_usage_pte_range, + .mm = vma->vm_mm, + .private = &ss, + }; + + if (!vma->vm_mm || is_vm_hugetlb_page(vma)) + return 0; + + ss.vma = vma; + ss.startpage = task->stack_start & PAGE_MASK; + ss.usage = 0; + +#ifdef CONFIG_STACK_GROWSUP + walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end, + &stack_walk); +#else + walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE, + &stack_walk); +#endif + return ss.usage; +} + +static inline void task_show_stack_usage(struct seq_file *m, + struct task_struct *task) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = get_task_mm(task); + + if (mm) { + down_read(&mm->mmap_sem); + vma = find_vma(mm, task->stack_start); + if (vma) + seq_printf(m, "Stack usage:\t%lu kB\n", + get_stack_usage_in_bytes(vma, task) >> 10); + + up_read(&mm->mmap_sem); + mmput(mm); + } +} + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, task_show_regs(m, task); #endif task_context_switch_counts(m, task); + task_show_stack_usage(m, task); return 0; } @@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, rsslim, mm ? mm->start_code : 0, mm ? mm->end_code : 0, - (permitted && mm) ? mm->start_stack : 0, + (permitted) ? task->stack_start : 0, esp, eip, /* The signal information here is obsolete. diff --git a/fs/proc/base.c b/fs/proc/base.c index 6f742f6658a..837469a9659 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer) do_posix_clock_monotonic_gettime(&uptime); read_lock(&tasklist_lock); - points = badness(task, uptime.tv_sec); + points = badness(task->group_leader, uptime.tv_sec); read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } @@ -458,7 +458,7 @@ struct limit_names { }; static const struct limit_names lnames[RLIM_NLIMITS] = { - [RLIMIT_CPU] = {"Max cpu time", "ms"}, + [RLIMIT_CPU] = {"Max cpu time", "seconds"}, [RLIMIT_FSIZE] = {"Max file size", "bytes"}, [RLIMIT_DATA] = {"Max data size", "bytes"}, [RLIMIT_STACK] = {"Max stack size", "bytes"}, @@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); char buffer[PROC_NUMBUF]; size_t len; - int oom_adjust; + int oom_adjust = OOM_DISABLE; + unsigned long flags; if (!task) return -ESRCH; - oom_adjust = task->oomkilladj; + + if (lock_task_sighand(task, &flags)) { + oom_adjust = task->signal->oom_adj; + unlock_task_sighand(task, &flags); + } + put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); @@ -1015,32 +1021,44 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - int oom_adjust; + char buffer[PROC_NUMBUF]; + long oom_adjust; + unsigned long flags; + int err; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - oom_adjust = simple_strtol(buffer, &end, 0); + + err = strict_strtol(strstrip(buffer), 0, &oom_adjust); + if (err) + return -EINVAL; if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && oom_adjust != OOM_DISABLE) return -EINVAL; - if (*end == '\n') - end++; + task = get_proc_task(file->f_path.dentry->d_inode); if (!task) return -ESRCH; - if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { + if (!lock_task_sighand(task, &flags)) { + put_task_struct(task); + return -ESRCH; + } + + if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { + unlock_task_sighand(task, &flags); put_task_struct(task); return -EACCES; } - task->oomkilladj = oom_adjust; + + task->signal->oom_adj = oom_adjust; + + unlock_task_sighand(task, &flags); put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; + + return count; } static const struct file_operations proc_oom_adjust_operations = { @@ -1169,17 +1187,16 @@ static ssize_t proc_fault_inject_write(struct file * file, count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - make_it_fail = simple_strtol(buffer, &end, 0); - if (*end == '\n') - end++; + make_it_fail = simple_strtol(strstrip(buffer), &end, 0); + if (*end) + return -EINVAL; task = get_proc_task(file->f_dentry->d_inode); if (!task) return -ESRCH; task->make_it_fail = make_it_fail; put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; + + return count; } static const struct file_operations proc_fault_inject_operations = { @@ -2586,9 +2603,6 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) dput(dentry); } - if (tgid == 0) - goto out; - name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); @@ -2645,17 +2659,16 @@ out: void proc_flush_task(struct task_struct *task) { int i; - struct pid *pid, *tgid = NULL; + struct pid *pid, *tgid; struct upid *upid; pid = task_pid(task); - if (thread_group_leader(task)) - tgid = task_tgid(task); + tgid = task_tgid(task); for (i = 0; i <= pid->level; i++) { upid = &pid->numbers[i]; proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, - tgid ? tgid->numbers[i].nr : 0); + tgid->numbers[i].nr); } upid = &pid->numbers[pid->level]; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 59b43a06887..56013371f9f 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -17,9 +17,15 @@ #include <linux/elfcore.h> #include <linux/vmalloc.h> #include <linux/highmem.h> +#include <linux/bootmem.h> #include <linux/init.h> #include <asm/uaccess.h> #include <asm/io.h> +#include <linux/list.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/memory.h> +#include <asm/sections.h> #define CORE_STR "CORE" @@ -29,17 +35,6 @@ static struct proc_dir_entry *proc_root_kcore; -static int open_kcore(struct inode * inode, struct file * filp) -{ - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); - -static const struct file_operations proc_kcore_operations = { - .read = read_kcore, - .open = open_kcore, -}; #ifndef kc_vaddr_to_offset #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) @@ -57,18 +52,19 @@ struct memelfnote void *data; }; -static struct kcore_list *kclist; +static LIST_HEAD(kclist_head); static DEFINE_RWLOCK(kclist_lock); +static int kcore_need_update = 1; void -kclist_add(struct kcore_list *new, void *addr, size_t size) +kclist_add(struct kcore_list *new, void *addr, size_t size, int type) { new->addr = (unsigned long)addr; new->size = size; + new->type = type; write_lock(&kclist_lock); - new->next = kclist; - kclist = new; + list_add_tail(&new->list, &kclist_head); write_unlock(&kclist_lock); } @@ -80,7 +76,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) *nphdr = 1; /* PT_NOTE */ size = 0; - for (m=kclist; m; m=m->next) { + list_for_each_entry(m, &kclist_head, list) { try = kc_vaddr_to_offset((size_t)m->addr + m->size); if (try > size) size = try; @@ -97,6 +93,177 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) return size + *elf_buflen; } +static void free_kclist_ents(struct list_head *head) +{ + struct kcore_list *tmp, *pos; + + list_for_each_entry_safe(pos, tmp, head, list) { + list_del(&pos->list); + kfree(pos); + } +} +/* + * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list. + */ +static void __kcore_update_ram(struct list_head *list) +{ + int nphdr; + size_t size; + struct kcore_list *tmp, *pos; + LIST_HEAD(garbage); + + write_lock(&kclist_lock); + if (kcore_need_update) { + list_for_each_entry_safe(pos, tmp, &kclist_head, list) { + if (pos->type == KCORE_RAM + || pos->type == KCORE_VMEMMAP) + list_move(&pos->list, &garbage); + } + list_splice_tail(list, &kclist_head); + } else + list_splice(list, &garbage); + kcore_need_update = 0; + proc_root_kcore->size = get_kcore_size(&nphdr, &size); + write_unlock(&kclist_lock); + + free_kclist_ents(&garbage); +} + + +#ifdef CONFIG_HIGHMEM +/* + * If no highmem, we can assume [0...max_low_pfn) continuous range of memory + * because memory hole is not as big as !HIGHMEM case. + * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) + */ +static int kcore_update_ram(void) +{ + LIST_HEAD(head); + struct kcore_list *ent; + int ret = 0; + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + ent->addr = (unsigned long)__va(0); + ent->size = max_low_pfn << PAGE_SHIFT; + ent->type = KCORE_RAM; + list_add(&ent->list, &head); + __kcore_update_ram(&head); + return ret; +} + +#else /* !CONFIG_HIGHMEM */ + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* calculate vmemmap's address from given system ram pfn and register it */ +int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) +{ + unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; + unsigned long nr_pages = ent->size >> PAGE_SHIFT; + unsigned long start, end; + struct kcore_list *vmm, *tmp; + + + start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; + end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; + end = ALIGN(end, PAGE_SIZE); + /* overlap check (because we have to align page */ + list_for_each_entry(tmp, head, list) { + if (tmp->type != KCORE_VMEMMAP) + continue; + if (start < tmp->addr + tmp->size) + if (end > tmp->addr) + end = tmp->addr; + } + if (start < end) { + vmm = kmalloc(sizeof(*vmm), GFP_KERNEL); + if (!vmm) + return 0; + vmm->addr = start; + vmm->size = end - start; + vmm->type = KCORE_VMEMMAP; + list_add_tail(&vmm->list, head); + } + return 1; + +} +#else +int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) +{ + return 1; +} + +#endif + +static int +kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) +{ + struct list_head *head = (struct list_head *)arg; + struct kcore_list *ent; + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); + ent->size = nr_pages << PAGE_SHIFT; + + /* Sanity check: Can happen in 32bit arch...maybe */ + if (ent->addr < (unsigned long) __va(0)) + goto free_out; + + /* cut not-mapped area. ....from ppc-32 code. */ + if (ULONG_MAX - ent->addr < ent->size) + ent->size = ULONG_MAX - ent->addr; + + /* cut when vmalloc() area is higher than direct-map area */ + if (VMALLOC_START > (unsigned long)__va(0)) { + if (ent->addr > VMALLOC_START) + goto free_out; + if (VMALLOC_START - ent->addr < ent->size) + ent->size = VMALLOC_START - ent->addr; + } + + ent->type = KCORE_RAM; + list_add_tail(&ent->list, head); + + if (!get_sparsemem_vmemmap_info(ent, head)) { + list_del(&ent->list); + goto free_out; + } + + return 0; +free_out: + kfree(ent); + return 1; +} + +static int kcore_update_ram(void) +{ + int nid, ret; + unsigned long end_pfn; + LIST_HEAD(head); + + /* Not inialized....update now */ + /* find out "max pfn" */ + end_pfn = 0; + for_each_node_state(nid, N_HIGH_MEMORY) { + unsigned long node_end; + node_end = NODE_DATA(nid)->node_start_pfn + + NODE_DATA(nid)->node_spanned_pages; + if (end_pfn < node_end) + end_pfn = node_end; + } + /* scan 0 to max_pfn */ + ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private); + if (ret) { + free_kclist_ents(&head); + return -ENOMEM; + } + __kcore_update_ram(&head); + return ret; +} +#endif /* CONFIG_HIGHMEM */ /*****************************************************************************/ /* @@ -192,7 +359,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) nhdr->p_align = 0; /* setup ELF PT_LOAD program header for every area */ - for (m=kclist; m; m=m->next) { + list_for_each_entry(m, &kclist_head, list) { phdr = (struct elf_phdr *) bufp; bufp += sizeof(struct elf_phdr); offset += sizeof(struct elf_phdr); @@ -265,7 +432,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) unsigned long start; read_lock(&kclist_lock); - proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen); + size = get_kcore_size(&nphdr, &elf_buflen); + if (buflen == 0 || *fpos >= size) { read_unlock(&kclist_lock); return 0; @@ -317,7 +485,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) struct kcore_list *m; read_lock(&kclist_lock); - for (m=kclist; m; m=m->next) { + list_for_each_entry(m, &kclist_head, list) { if (start >= m->addr && start < (m->addr+m->size)) break; } @@ -326,45 +494,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (m == NULL) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if (is_vmalloc_addr((void *)start)) { + } else if (is_vmalloc_or_module_addr((void *)start)) { char * elf_buf; - struct vm_struct *m; - unsigned long curstart = start; - unsigned long cursize = tsz; elf_buf = kzalloc(tsz, GFP_KERNEL); if (!elf_buf) return -ENOMEM; - - read_lock(&vmlist_lock); - for (m=vmlist; m && cursize; m=m->next) { - unsigned long vmstart; - unsigned long vmsize; - unsigned long msize = m->size - PAGE_SIZE; - - if (((unsigned long)m->addr + msize) < - curstart) - continue; - if ((unsigned long)m->addr > (curstart + - cursize)) - break; - vmstart = (curstart < (unsigned long)m->addr ? - (unsigned long)m->addr : curstart); - if (((unsigned long)m->addr + msize) > - (curstart + cursize)) - vmsize = curstart + cursize - vmstart; - else - vmsize = (unsigned long)m->addr + - msize - vmstart; - curstart = vmstart + vmsize; - cursize -= vmsize; - /* don't dump ioremap'd stuff! (TA) */ - if (m->flags & VM_IOREMAP) - continue; - memcpy(elf_buf + (vmstart - start), - (char *)vmstart, vmsize); - } - read_unlock(&vmlist_lock); + vread(elf_buf, (char *)start, tsz); + /* we have to zero-fill user buffer even if no read */ if (copy_to_user(buffer, elf_buf, tsz)) { kfree(elf_buf); return -EFAULT; @@ -402,12 +539,96 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return acc; } + +static int open_kcore(struct inode *inode, struct file *filp) +{ + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + if (kcore_need_update) + kcore_update_ram(); + if (i_size_read(inode) != proc_root_kcore->size) { + mutex_lock(&inode->i_mutex); + i_size_write(inode, proc_root_kcore->size); + mutex_unlock(&inode->i_mutex); + } + return 0; +} + + +static const struct file_operations proc_kcore_operations = { + .read = read_kcore, + .open = open_kcore, +}; + +#ifdef CONFIG_MEMORY_HOTPLUG +/* just remember that we have to update kcore */ +static int __meminit kcore_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + switch (action) { + case MEM_ONLINE: + case MEM_OFFLINE: + write_lock(&kclist_lock); + kcore_need_update = 1; + write_unlock(&kclist_lock); + } + return NOTIFY_OK; +} +#endif + + +static struct kcore_list kcore_vmalloc; + +#ifdef CONFIG_ARCH_PROC_KCORE_TEXT +static struct kcore_list kcore_text; +/* + * If defined, special segment is used for mapping kernel text instead of + * direct-map area. We need to create special TEXT section. + */ +static void __init proc_kcore_text_init(void) +{ + kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT); +} +#else +static void __init proc_kcore_text_init(void) +{ +} +#endif + +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) +/* + * MODULES_VADDR has no intersection with VMALLOC_ADDR. + */ +struct kcore_list kcore_modules; +static void __init add_modules_range(void) +{ + kclist_add(&kcore_modules, (void *)MODULES_VADDR, + MODULES_END - MODULES_VADDR, KCORE_VMALLOC); +} +#else +static void __init add_modules_range(void) +{ +} +#endif + static int __init proc_kcore_init(void) { - proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); - if (proc_root_kcore) - proc_root_kcore->size = - (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, + &proc_kcore_operations); + if (!proc_root_kcore) { + printk(KERN_ERR "couldn't create /proc/kcore\n"); + return 0; /* Always returns 0. */ + } + /* Store text area if it's special */ + proc_kcore_text_init(); + /* Store vmalloc area */ + kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, + VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); + add_modules_range(); + /* Store direct-map area from physical memory map */ + kcore_update_ram(); + hotplug_memory_notifier(kcore_callback, 0); + return 0; } module_init(proc_kcore_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index d5c410d47fa..171e052c07b 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -81,9 +81,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Writeback: %8lu kB\n" "AnonPages: %8lu kB\n" "Mapped: %8lu kB\n" + "Shmem: %8lu kB\n" "Slab: %8lu kB\n" "SReclaimable: %8lu kB\n" "SUnreclaim: %8lu kB\n" + "KernelStack: %8lu kB\n" "PageTables: %8lu kB\n" #ifdef CONFIG_QUICKLIST "Quicklists: %8lu kB\n" @@ -124,10 +126,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(global_page_state(NR_WRITEBACK)), K(global_page_state(NR_ANON_PAGES)), K(global_page_state(NR_FILE_MAPPED)), + K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_SLAB_RECLAIMABLE)), K(global_page_state(NR_SLAB_UNRECLAIMABLE)), + global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024, K(global_page_state(NR_PAGETABLE)), #ifdef CONFIG_QUICKLIST K(quicklist_total_size()), diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 7e14d1a0400..9fe7d7ebe11 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos) return rb_next((struct rb_node *) v); } -static struct seq_operations proc_nommu_region_list_seqop = { +static const struct seq_operations proc_nommu_region_list_seqop = { .start = nommu_region_list_start, .next = nommu_region_list_next, .stop = nommu_region_list_stop, diff --git a/fs/proc/page.c b/fs/proc/page.c index 2707c6c7a20..2281c2cbfe2 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -2,6 +2,7 @@ #include <linux/compiler.h> #include <linux/fs.h> #include <linux/init.h> +#include <linux/ksm.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/proc_fs.h> @@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = { #define KPF_UNEVICTABLE 18 #define KPF_NOPAGE 20 +#define KPF_KSM 21 + /* kernel hacking assistances * WARNING: subject to change, never rely on them! */ @@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; + if (PageKsm(page)) + u |= 1 << KPF_KSM; /* * compound pages: export both head/tail info diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9bd8be1d235..2a1bef9203c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { name = "[stack]"; + } else { + unsigned long stack_start; + struct proc_maps_private *pmp; + + pmp = m->private; + stack_start = pmp->task->stack_start; + + if (vma->vm_start <= stack_start && + vma->vm_end >= stack_start) { + pad_len_spaces(m, len); + seq_printf(m, + "[threadstack:%08lx]", +#ifdef CONFIG_STACK_GROWSUP + vma->vm_end - stack_start +#else + stack_start - vma->vm_start +#endif + ); + } } } else { name = "[vdso]"; @@ -465,23 +484,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } +#define CLEAR_REFS_ALL 1 +#define CLEAR_REFS_ANON 2 +#define CLEAR_REFS_MAPPED 3 + static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; - char buffer[PROC_NUMBUF], *end; + char buffer[PROC_NUMBUF]; struct mm_struct *mm; struct vm_area_struct *vma; + long type; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) + if (strict_strtol(strstrip(buffer), 10, &type)) + return -EINVAL; + if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return -EINVAL; - if (*end == '\n') - end++; task = get_proc_task(file->f_path.dentry->d_inode); if (!task) return -ESRCH; @@ -494,18 +518,31 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; - if (!is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_start, vma->vm_end, - &clear_refs_walk); + if (is_vm_hugetlb_page(vma)) + continue; + /* + * Writing 1 to /proc/pid/clear_refs affects all pages. + * + * Writing 2 to /proc/pid/clear_refs only affects + * Anonymous pages. + * + * Writing 3 to /proc/pid/clear_refs only affects file + * mapped pages. + */ + if (type == CLEAR_REFS_ANON && vma->vm_file) + continue; + if (type == CLEAR_REFS_MAPPED && !vma->vm_file) + continue; + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); } flush_tlb_mm(mm); up_read(&mm->mmap_sem); mmput(mm); } put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; + + return count; } const struct file_operations proc_clear_refs_operations = { diff --git a/fs/qnx4/Kconfig b/fs/qnx4/Kconfig index be8e0e1445b..5f608999404 100644 --- a/fs/qnx4/Kconfig +++ b/fs/qnx4/Kconfig @@ -6,20 +6,9 @@ config QNX4FS_FS QNX 4 and QNX 6 (the latter is also called QNX RTP). Further information is available at <http://www.qnx.com/>. Say Y if you intend to mount QNX hard disks or floppies. - Unless you say Y to "QNX4FS read-write support" below, you will - only be able to read these file systems. To compile this file system support as a module, choose M here: the module will be called qnx4. If you don't know whether you need it, then you don't need it: answer N. - -config QNX4FS_RW - bool "QNX4FS write support (DANGEROUS)" - depends on QNX4FS_FS && EXPERIMENTAL && BROKEN - help - Say Y if you want to test write support for QNX4 file systems. - - It's currently broken, so for now: - answer N. diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile index e4d408cc547..4a283b3f87f 100644 --- a/fs/qnx4/Makefile +++ b/fs/qnx4/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4.o -qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o +qnx4-objs := inode.o dir.o namei.o bitmap.o diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index e1cd061a25f..0afba069d56 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -78,84 +78,3 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb) return total_free; } - -#ifdef CONFIG_QNX4FS_RW - -int qnx4_is_free(struct super_block *sb, long block) -{ - int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1; - int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size); - struct buffer_head *bh; - const char *g; - int ret = -EIO; - - start += block / (QNX4_BLOCK_SIZE * 8); - QNX4DEBUG(("qnx4: is_free requesting block [%lu], bitmap in block [%lu]\n", - (unsigned long) block, (unsigned long) start)); - (void) size; /* CHECKME */ - bh = sb_bread(sb, start); - if (bh == NULL) { - return -EIO; - } - g = bh->b_data + (block % QNX4_BLOCK_SIZE); - if (((*g) & (1 << (block % 8))) == 0) { - QNX4DEBUG(("qnx4: is_free -> block is free\n")); - ret = 1; - } else { - QNX4DEBUG(("qnx4: is_free -> block is busy\n")); - ret = 0; - } - brelse(bh); - - return ret; -} - -int qnx4_set_bitmap(struct super_block *sb, long block, int busy) -{ - int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1; - int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size); - struct buffer_head *bh; - char *g; - - start += block / (QNX4_BLOCK_SIZE * 8); - QNX4DEBUG(("qnx4: set_bitmap requesting block [%lu], bitmap in block [%lu]\n", - (unsigned long) block, (unsigned long) start)); - (void) size; /* CHECKME */ - bh = sb_bread(sb, start); - if (bh == NULL) { - return -EIO; - } - g = bh->b_data + (block % QNX4_BLOCK_SIZE); - if (busy == 0) { - (*g) &= ~(1 << (block % 8)); - } else { - (*g) |= (1 << (block % 8)); - } - mark_buffer_dirty(bh); - brelse(bh); - - return 0; -} - -static void qnx4_clear_inode(struct inode *inode) -{ - struct qnx4_inode_entry *qnx4_ino = qnx4_raw_inode(inode); - /* What for? */ - memset(qnx4_ino->di_fname, 0, sizeof qnx4_ino->di_fname); - qnx4_ino->di_size = 0; - qnx4_ino->di_num_xtnts = 0; - qnx4_ino->di_mode = 0; - qnx4_ino->di_status = 0; -} - -void qnx4_free_inode(struct inode *inode) -{ - if (inode->i_ino < 1) { - printk("free_inode: inode 0 or nonexistent inode\n"); - return; - } - qnx4_clear_inode(inode); - clear_inode(inode); -} - -#endif diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 003c68f3238..86cc39cb139 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -85,9 +85,4 @@ const struct file_operations qnx4_dir_operations = const struct inode_operations qnx4_dir_inode_operations = { .lookup = qnx4_lookup, -#ifdef CONFIG_QNX4FS_RW - .create = qnx4_create, - .unlink = qnx4_unlink, - .rmdir = qnx4_rmdir, -#endif }; diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c deleted file mode 100644 index 09b170ac936..00000000000 --- a/fs/qnx4/file.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * QNX4 file system, Linux implementation. - * - * Version : 0.2.1 - * - * Using parts of the xiafs filesystem. - * - * History : - * - * 25-05-1998 by Richard Frowijn : first release. - * 21-06-1998 by Frank Denis : wrote qnx4_readpage to use generic_file_read. - * 27-06-1998 by Frank Denis : file overwriting. - */ - -#include "qnx4.h" - -/* - * We have mostly NULL's here: the current defaults are ok for - * the qnx4 filesystem. - */ -const struct file_operations qnx4_file_operations = -{ - .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .mmap = generic_file_mmap, - .splice_read = generic_file_splice_read, -#ifdef CONFIG_QNX4FS_RW - .write = do_sync_write, - .aio_write = generic_file_aio_write, - .fsync = simple_fsync, -#endif -}; - -const struct inode_operations qnx4_file_inode_operations = -{ -#ifdef CONFIG_QNX4FS_RW - .truncate = qnx4_truncate, -#endif -}; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 681df5fcd16..d2cd1798d8c 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -28,73 +28,6 @@ static const struct super_operations qnx4_sops; -#ifdef CONFIG_QNX4FS_RW - -static void qnx4_delete_inode(struct inode *inode) -{ - QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); - truncate_inode_pages(&inode->i_data, 0); - inode->i_size = 0; - qnx4_truncate(inode); - lock_kernel(); - qnx4_free_inode(inode); - unlock_kernel(); -} - -static int qnx4_write_inode(struct inode *inode, int do_sync) -{ - struct qnx4_inode_entry *raw_inode; - int block, ino; - struct buffer_head *bh; - ino = inode->i_ino; - - QNX4DEBUG(("qnx4: write inode 1.\n")); - if (inode->i_nlink == 0) { - return 0; - } - if (!ino) { - printk("qnx4: bad inode number on dev %s: %d is out of range\n", - inode->i_sb->s_id, ino); - return -EIO; - } - QNX4DEBUG(("qnx4: write inode 2.\n")); - block = ino / QNX4_INODES_PER_BLOCK; - lock_kernel(); - if (!(bh = sb_bread(inode->i_sb, block))) { - printk("qnx4: major problem: unable to read inode from dev " - "%s\n", inode->i_sb->s_id); - unlock_kernel(); - return -EIO; - } - raw_inode = ((struct qnx4_inode_entry *) bh->b_data) + - (ino % QNX4_INODES_PER_BLOCK); - raw_inode->di_mode = cpu_to_le16(inode->i_mode); - raw_inode->di_uid = cpu_to_le16(fs_high2lowuid(inode->i_uid)); - raw_inode->di_gid = cpu_to_le16(fs_high2lowgid(inode->i_gid)); - raw_inode->di_nlink = cpu_to_le16(inode->i_nlink); - raw_inode->di_size = cpu_to_le32(inode->i_size); - raw_inode->di_mtime = cpu_to_le32(inode->i_mtime.tv_sec); - raw_inode->di_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks); - mark_buffer_dirty(bh); - if (do_sync) { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) { - printk("qnx4: IO error syncing inode [%s:%08x]\n", - inode->i_sb->s_id, ino); - brelse(bh); - unlock_kernel(); - return -EIO; - } - } - brelse(bh); - unlock_kernel(); - return 0; -} - -#endif - static void qnx4_put_super(struct super_block *sb); static struct inode *qnx4_alloc_inode(struct super_block *sb); static void qnx4_destroy_inode(struct inode *inode); @@ -108,10 +41,6 @@ static const struct super_operations qnx4_sops = .put_super = qnx4_put_super, .statfs = qnx4_statfs, .remount_fs = qnx4_remount, -#ifdef CONFIG_QNX4FS_RW - .write_inode = qnx4_write_inode, - .delete_inode = qnx4_delete_inode, -#endif }; static int qnx4_remount(struct super_block *sb, int *flags, char *data) @@ -120,15 +49,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; -#ifndef CONFIG_QNX4FS_RW *flags |= MS_RDONLY; -#endif - if (*flags & MS_RDONLY) { - return 0; - } - - mark_buffer_dirty(qs->sb_buf); - return 0; } @@ -354,9 +275,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) } s->s_op = &qnx4_sops; s->s_magic = QNX4_SUPER_MAGIC; -#ifndef CONFIG_QNX4FS_RW s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ -#endif qnx4_sb(s)->sb_buf = bh; qnx4_sb(s)->sb = (struct qnx4_super_block *) bh->b_data; @@ -489,8 +408,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); if (S_ISREG(inode->i_mode)) { - inode->i_op = &qnx4_file_inode_operations; - inode->i_fop = &qnx4_file_operations; + inode->i_fop = &generic_ro_fops; inode->i_mapping->a_ops = &qnx4_aops; qnx4_i(inode)->mmu_private = inode->i_size; } else if (S_ISDIR(inode->i_mode)) { diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 5972ed21493..ae1e7edbacd 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -134,108 +134,3 @@ out: return NULL; } - -#ifdef CONFIG_QNX4FS_RW -int qnx4_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - QNX4DEBUG(("qnx4: qnx4_create\n")); - if (dir == NULL) { - return -ENOENT; - } - return -ENOSPC; -} - -int qnx4_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct buffer_head *bh; - struct qnx4_inode_entry *de; - struct inode *inode; - int retval; - int ino; - - QNX4DEBUG(("qnx4: qnx4_rmdir [%s]\n", dentry->d_name.name)); - lock_kernel(); - bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name, - &de, &ino); - if (bh == NULL) { - unlock_kernel(); - return -ENOENT; - } - inode = dentry->d_inode; - if (inode->i_ino != ino) { - retval = -EIO; - goto end_rmdir; - } -#if 0 - if (!empty_dir(inode)) { - retval = -ENOTEMPTY; - goto end_rmdir; - } -#endif - if (inode->i_nlink != 2) { - QNX4DEBUG(("empty directory has nlink!=2 (%d)\n", inode->i_nlink)); - } - QNX4DEBUG(("qnx4: deleting directory\n")); - de->di_status = 0; - memset(de->di_fname, 0, sizeof de->di_fname); - de->di_mode = 0; - mark_buffer_dirty_inode(bh, dir); - clear_nlink(inode); - mark_inode_dirty(inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - inode_dec_link_count(dir); - retval = 0; - - end_rmdir: - brelse(bh); - - unlock_kernel(); - return retval; -} - -int qnx4_unlink(struct inode *dir, struct dentry *dentry) -{ - struct buffer_head *bh; - struct qnx4_inode_entry *de; - struct inode *inode; - int retval; - int ino; - - QNX4DEBUG(("qnx4: qnx4_unlink [%s]\n", dentry->d_name.name)); - lock_kernel(); - bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name, - &de, &ino); - if (bh == NULL) { - unlock_kernel(); - return -ENOENT; - } - inode = dentry->d_inode; - if (inode->i_ino != ino) { - retval = -EIO; - goto end_unlink; - } - retval = -EPERM; - if (!inode->i_nlink) { - QNX4DEBUG(("Deleting nonexistent file (%s:%lu), %d\n", - inode->i_sb->s_id, - inode->i_ino, inode->i_nlink)); - inode->i_nlink = 1; - } - de->di_status = 0; - memset(de->di_fname, 0, sizeof de->di_fname); - de->di_mode = 0; - mark_buffer_dirty_inode(bh, dir); - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - mark_inode_dirty(dir); - inode->i_ctime = dir->i_ctime; - inode_dec_link_count(inode); - retval = 0; - -end_unlink: - unlock_kernel(); - brelse(bh); - - return retval; -} -#endif diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h index 9efc089454f..33a60858203 100644 --- a/fs/qnx4/qnx4.h +++ b/fs/qnx4/qnx4.h @@ -29,17 +29,9 @@ extern unsigned long qnx4_block_map(struct inode *inode, long iblock); extern struct buffer_head *qnx4_bread(struct inode *, int, int); -extern const struct inode_operations qnx4_file_inode_operations; extern const struct inode_operations qnx4_dir_inode_operations; -extern const struct file_operations qnx4_file_operations; extern const struct file_operations qnx4_dir_operations; extern int qnx4_is_free(struct super_block *sb, long block); -extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); -extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); -extern void qnx4_truncate(struct inode *inode); -extern void qnx4_free_inode(struct inode *inode); -extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); -extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) { diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c deleted file mode 100644 index d94d9ee241f..00000000000 --- a/fs/qnx4/truncate.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * QNX4 file system, Linux implementation. - * - * Version : 0.1 - * - * Using parts of the xiafs filesystem. - * - * History : - * - * 30-06-1998 by Frank DENIS : ugly filler. - */ - -#include <linux/smp_lock.h> -#include "qnx4.h" - -#ifdef CONFIG_QNX4FS_RW - -void qnx4_truncate(struct inode *inode) -{ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) { - return; - } - lock_kernel(); - if (!(S_ISDIR(inode->i_mode))) { - /* TODO */ - } - QNX4DEBUG(("qnx4: qnx4_truncate called\n")); - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - unlock_kernel(); -} - -#endif diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 38f7bd559f3..39b49c42a7e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1839,7 +1839,7 @@ EXPORT_SYMBOL(dquot_commit_info); /* * Definitions of diskquota operations. */ -struct dquot_operations dquot_operations = { +const struct dquot_operations dquot_operations = { .initialize = dquot_initialize, .drop = dquot_drop, .alloc_space = dquot_alloc_space, @@ -2461,7 +2461,7 @@ out: } EXPORT_SYMBOL(vfs_set_dqinfo); -struct quotactl_ops vfs_quotactl_ops = { +const struct quotactl_ops vfs_quotactl_ops = { .quota_on = vfs_quota_on, .quota_off = vfs_quota_off, .quota_sync = vfs_quota_sync, diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a7f0110fca4..a6090aa1a7c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -34,12 +34,10 @@ #include <linux/ramfs.h> #include <linux/sched.h> #include <linux/parser.h> +#include <linux/magic.h> #include <asm/uaccess.h> #include "internal.h" -/* some random number */ -#define RAMFS_MAGIC 0x858458f6 - #define RAMFS_DEFAULT_MODE 0755 static const struct super_operations ramfs_ops; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7adea74d6a8..f0ad05f3802 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -612,7 +612,7 @@ static int reiserfs_mark_dquot_dirty(struct dquot *); static int reiserfs_write_info(struct super_block *, int); static int reiserfs_quota_on(struct super_block *, int, int, char *, int); -static struct dquot_operations reiserfs_quota_operations = { +static const struct dquot_operations reiserfs_quota_operations = { .initialize = dquot_initialize, .drop = dquot_drop, .alloc_space = dquot_alloc_space, @@ -629,7 +629,7 @@ static struct dquot_operations reiserfs_quota_operations = { .destroy_dquot = dquot_destroy, }; -static struct quotactl_ops reiserfs_qctl_operations = { +static const struct quotactl_ops reiserfs_qctl_operations = { .quota_on = reiserfs_quota_on, .quota_off = vfs_quota_off, .quota_sync = vfs_quota_sync, diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 4ab3c03d8f9..47f132df0c3 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -284,7 +284,7 @@ static const struct file_operations romfs_dir_operations = { .readdir = romfs_readdir, }; -static struct inode_operations romfs_dir_inode_operations = { +static const struct inode_operations romfs_dir_inode_operations = { .lookup = romfs_lookup, }; diff --git a/fs/select.c b/fs/select.c index 8084834e123..a201fc37022 100644 --- a/fs/select.c +++ b/fs/select.c @@ -41,22 +41,28 @@ * better solutions.. */ +#define MAX_SLACK (100 * NSEC_PER_MSEC) + static long __estimate_accuracy(struct timespec *tv) { long slack; int divfactor = 1000; + if (tv->tv_sec < 0) + return 0; + if (task_nice(current) > 0) divfactor = divfactor / 5; + if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor)) + return MAX_SLACK; + slack = tv->tv_nsec / divfactor; slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); - if (slack > 100 * NSEC_PER_MSEC) - slack = 100 * NSEC_PER_MSEC; + if (slack > MAX_SLACK) + return MAX_SLACK; - if (slack < 0) - slack = 0; return slack; } diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 9468168b9af..71c29b6670b 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -509,7 +509,7 @@ date_unix2dos(struct smb_sb_info *server, month = 2; } else { nl_day = (year & 3) || day <= 59 ? day : day - 1; - for (month = 0; month < 12; month++) + for (month = 1; month < 12; month++) if (day_n[month] > nl_day) break; } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index cb5fc57e370..6c197ef53ad 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -44,7 +44,7 @@ #include "squashfs.h" static struct file_system_type squashfs_fs_type; -static struct super_operations squashfs_super_ops; +static const struct super_operations squashfs_super_ops; static int supported_squashfs_filesystem(short major, short minor, short comp) { @@ -444,7 +444,7 @@ static struct file_system_type squashfs_fs_type = { .fs_flags = FS_REQUIRES_DEV }; -static struct super_operations squashfs_super_ops = { +static const struct super_operations squashfs_super_ops = { .alloc_inode = squashfs_alloc_inode, .destroy_inode = squashfs_destroy_inode, .statfs = squashfs_statfs, diff --git a/fs/super.c b/fs/super.c index b03fea8fbfb..0e7207b9815 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,7 @@ DEFINE_SPINLOCK(sb_lock); static struct super_block *alloc_super(struct file_system_type *type) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); - static struct super_operations default_op; + static const struct super_operations default_op; if (s) { if (security_sb_alloc(s)) { diff --git a/fs/sync.c b/fs/sync.c index c08467a5d7c..d104591b066 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -183,6 +183,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) ret = err; return ret; } +EXPORT_SYMBOL(file_fsync); /** * vfs_fsync_range - helper to sync a range of data & metadata to disk diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index ee1ce68fd98..076ca50e993 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -715,7 +715,7 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) * ubifs_get_free_space - return amount of free space. * @c: UBIFS file-system description object * - * This function calculates and retuns amount of free space to report to + * This function calculates and returns amount of free space to report to * user-space. */ long long ubifs_get_free_space(struct ubifs_info *c) diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index f3a7945527f..4775af40116 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -510,7 +510,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; int first = 1, iip; struct ubifs_debug_info *d = c->dbg; - union ubifs_key lower_key, upper_key, l_key, u_key; + union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key; unsigned long long uninitialized_var(last_sqnum); struct ubifs_idx_node *idx; struct list_head list; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ce2cd834361..dbc093afd94 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -210,6 +210,20 @@ const char *dbg_cstate(int cmt_state) } } +const char *dbg_jhead(int jhead) +{ + switch (jhead) { + case GCHD: + return "0 (GC)"; + case BASEHD: + return "1 (base)"; + case DATAHD: + return "2 (data)"; + default: + return "unknown journal head"; + } +} + static void dump_ch(const struct ubifs_ch *ch) { printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); @@ -623,8 +637,9 @@ void dbg_dump_budg(struct ubifs_info *c) /* If we are in R/O mode, journal heads do not exist */ if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", - c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); + printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", + dbg_jhead(c->jheads[i].wbuf.jhead), + c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); @@ -648,9 +663,90 @@ void dbg_dump_budg(struct ubifs_info *c) void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) { - printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " - "flags %#x\n", lp->lnum, lp->free, lp->dirty, - c->leb_size - lp->free - lp->dirty, lp->flags); + int i, spc, dark = 0, dead = 0; + struct rb_node *rb; + struct ubifs_bud *bud; + + spc = lp->free + lp->dirty; + if (spc < c->dead_wm) + dead = spc; + else + dark = ubifs_calc_dark(c, spc); + + if (lp->flags & LPROPS_INDEX) + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d flags %#x (", lp->lnum, lp->free, + lp->dirty, c->leb_size - spc, spc, lp->flags); + else + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " + "flags %#-4x (", lp->lnum, lp->free, lp->dirty, + c->leb_size - spc, spc, dark, dead, + (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); + + if (lp->flags & LPROPS_TAKEN) { + if (lp->flags & LPROPS_INDEX) + printk(KERN_CONT "index, taken"); + else + printk(KERN_CONT "taken"); + } else { + const char *s; + + if (lp->flags & LPROPS_INDEX) { + switch (lp->flags & LPROPS_CAT_MASK) { + case LPROPS_DIRTY_IDX: + s = "dirty index"; + break; + case LPROPS_FRDI_IDX: + s = "freeable index"; + break; + default: + s = "index"; + } + } else { + switch (lp->flags & LPROPS_CAT_MASK) { + case LPROPS_UNCAT: + s = "not categorized"; + break; + case LPROPS_DIRTY: + s = "dirty"; + break; + case LPROPS_FREE: + s = "free"; + break; + case LPROPS_EMPTY: + s = "empty"; + break; + case LPROPS_FREEABLE: + s = "freeable"; + break; + default: + s = NULL; + break; + } + } + printk(KERN_CONT "%s", s); + } + + for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); + if (bud->lnum == lp->lnum) { + int head = 0; + for (i = 0; i < c->jhead_cnt; i++) { + if (lp->lnum == c->jheads[i].wbuf.lnum) { + printk(KERN_CONT ", jhead %s", + dbg_jhead(i)); + head = 1; + } + } + if (!head) + printk(KERN_CONT ", bud of jhead %s", + dbg_jhead(bud->jhead)); + } + } + if (lp->lnum == c->gc_lnum) + printk(KERN_CONT ", GC LEB"); + printk(KERN_CONT ")\n"); } void dbg_dump_lprops(struct ubifs_info *c) @@ -724,7 +820,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", current->pid, lnum); - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); if (IS_ERR(sleb)) { ubifs_err("scan error %d", (int)PTR_ERR(sleb)); return; @@ -909,8 +1005,10 @@ out: ubifs_msg("saved lprops statistics dump"); dbg_dump_lstats(&d->saved_lst); ubifs_get_lp_stats(c, &lst); + ubifs_msg("current lprops statistics dump"); - dbg_dump_lstats(&d->saved_lst); + dbg_dump_lstats(&lst); + spin_lock(&c->space_lock); dbg_dump_budg(c); spin_unlock(&c->space_lock); diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index c1cd73b2e06..29d960101ea 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -271,6 +271,7 @@ void ubifs_debugging_exit(struct ubifs_info *c); /* Dump functions */ const char *dbg_ntype(int type); const char *dbg_cstate(int cmt_state); +const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); @@ -321,6 +322,8 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, int dbg_check_lprops(struct ubifs_info *c); int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, int row, int col); +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + loff_t size); /* Force the use of in-the-gaps method for testing */ @@ -425,6 +428,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_ntype(type) "" #define dbg_cstate(cmt_state) "" +#define dbg_jhead(jhead) "" #define dbg_get_key_dump(c, key) ({}) #define dbg_dump_inode(c, inode) ({}) #define dbg_dump_node(c, node) ({}) @@ -460,6 +464,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_check_heap(c, heap, cat, add_pos) ({}) #define dbg_check_lprops(c) 0 #define dbg_check_lpt_nodes(c, cnode, row, col) 0 +#define dbg_check_inode_size(c, inode, size) 0 #define dbg_force_in_the_gaps_enabled 0 #define dbg_force_in_the_gaps() 0 #define dbg_failure_mode 0 diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 6d34dc7e33e..2e6481a7701 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -21,34 +21,32 @@ */ /* - * This file implements VFS file and inode operations of regular files, device + * This file implements VFS file and inode operations for regular files, device * nodes and symlinks as well as address space operations. * - * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the - * page is dirty and is used for budgeting purposes - dirty pages should not be - * budgeted. The PG_checked flag is set if full budgeting is required for the - * page e.g., when it corresponds to a file hole or it is just beyond the file - * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to - * fail in this function, and the budget is released in 'ubifs_write_end()'. So - * the PG_private and PG_checked flags carry the information about how the page - * was budgeted, to make it possible to release the budget properly. + * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if + * the page is dirty and is used for optimization purposes - dirty pages are + * not budgeted so the flag shows that 'ubifs_write_end()' should not release + * the budget for this page. The @PG_checked flag is set if full budgeting is + * required for the page e.g., when it corresponds to a file hole or it is + * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because + * it is OK to fail in this function, and the budget is released in + * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry + * information about how the page was budgeted, to make it possible to release + * the budget properly. * - * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations - * we implement. However, this is not true for '->writepage()', which might be - * called with 'i_mutex' unlocked. For example, when pdflush is performing - * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the - * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is - * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim - * path'. So, in '->writepage()' we are only guaranteed that the page is - * locked. + * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we + * implement. However, this is not true for 'ubifs_writepage()', which may be + * called with @i_mutex unlocked. For example, when pdflush is doing background + * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal" + * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the + * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()' + * we are only guaranteed that the page is locked. * - * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., - * readahead path does not have it locked ("sys_read -> generic_file_aio_read - * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is - * not set as well. However, UBIFS disables readahead. - * - * This, for example means that there might be 2 concurrent '->writepage()' - * calls for the same inode, but different inode dirty pages. + * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the + * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> + * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not + * set as well. However, UBIFS disables readahead. */ #include "ubifs.h" @@ -449,9 +447,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, /* * We change whole page so no need to load it. But we * have to set the @PG_checked flag to make the further - * code the page is new. This might be not true, but it - * is better to budget more that to read the page from - * the media. + * code know that the page is new. This might be not + * true, but it is better to budget more than to read + * the page from the media. */ SetPageChecked(page); skipped_read = 1; @@ -497,8 +495,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, } /* - * Whee, we aquired budgeting quickly - without involving - * garbage-collection, committing or forceing write-back. We return + * Whee, we acquired budgeting quickly - without involving + * garbage-collection, committing or forcing write-back. We return * with @ui->ui_mutex locked if we are appending pages, and unlocked * otherwise. This is an optimization (slightly hacky though). */ @@ -562,7 +560,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, /* * Return 0 to force VFS to repeat the whole operation, or the - * error code if 'do_readpage()' failes. + * error code if 'do_readpage()' fails. */ copied = do_readpage(page); goto out; @@ -1175,11 +1173,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); - /* The other attributes may be changed at the same time as well */ + /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); - err = ubifs_jnl_truncate(c, inode, old_size, new_size); mutex_unlock(&ui->ui_mutex); + out_budg: if (budgeted) ubifs_release_budget(c, &req); diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index f0f5f15d384..618c2701d3a 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -529,7 +529,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) * We scan the entire LEB even though we only really need to scan up to * (c->leb_size - lp->free). */ - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); if (IS_ERR(sleb)) return PTR_ERR(sleb); diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 762a7d6cec7..e589fedaf1e 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -297,7 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) { struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); - dbg_io("jhead %d", wbuf->jhead); + dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); wbuf->need_sync = 1; wbuf->c->need_wbuf_sync = 1; ubifs_wake_up_bgt(wbuf->c); @@ -314,7 +314,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) if (wbuf->no_timer) return; - dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead, + dbg_io("set timer for jhead %s, %llu-%llu millisecs", + dbg_jhead(wbuf->jhead), div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, USEC_PER_SEC)); @@ -351,8 +352,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) /* Write-buffer is empty or not seeked */ return 0; - dbg_io("LEB %d:%d, %d bytes, jhead %d", - wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead); + dbg_io("LEB %d:%d, %d bytes, jhead %s", + wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); ubifs_assert(!(wbuf->avail & 7)); ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); @@ -401,7 +402,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, { const struct ubifs_info *c = wbuf->c; - dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead); + dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); ubifs_assert(offs >= 0 && offs <= c->leb_size); ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); @@ -508,9 +509,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) struct ubifs_info *c = wbuf->c; int err, written, n, aligned_len = ALIGN(len, 8), offs; - dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len, - dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead, - wbuf->lnum, wbuf->offs + wbuf->used); + dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); @@ -535,8 +536,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) memcpy(wbuf->buf + wbuf->used, buf, len); if (aligned_len == wbuf->avail) { - dbg_io("flush jhead %d wbuf to LEB %d:%d", - wbuf->jhead, wbuf->lnum, wbuf->offs); + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -564,8 +565,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * minimal I/O unit. We have to fill and flush write-buffer and switch * to the next min. I/O unit. */ - dbg_io("flush jhead %d wbuf to LEB %d:%d", - wbuf->jhead, wbuf->lnum, wbuf->offs); + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -698,8 +699,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int err, rlen, overlap; struct ubifs_ch *ch = buf; - dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs, - dbg_ntype(type), len, wbuf->jhead); + dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, + dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 64b5f3a309f..d321baeca68 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -158,7 +158,7 @@ again: * some. But the write-buffer mutex has to be unlocked because * GC also takes it. */ - dbg_jnl("no free space jhead %d, run GC", jhead); + dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead)); mutex_unlock(&wbuf->io_mutex); lnum = ubifs_garbage_collect(c, 0); @@ -173,7 +173,8 @@ again: * because we dropped @wbuf->io_mutex, so try once * again. */ - dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); + dbg_jnl("GC couldn't make a free LEB for jhead %s", + dbg_jhead(jhead)); if (retries++ < 2) { dbg_jnl("retry (%d)", retries); goto again; @@ -184,7 +185,7 @@ again: } mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); - dbg_jnl("got LEB %d for jhead %d", lnum, jhead); + dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead)); avail = c->leb_size - wbuf->offs - wbuf->used; if (wbuf->lnum != -1 && avail >= len) { @@ -255,7 +256,8 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len, *lnum = c->jheads[jhead].wbuf.lnum; *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; - dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + dbg_jnl("jhead %s, LEB %d:%d, len %d", + dbg_jhead(jhead), *lnum, *offs, len); ubifs_prepare_node(c, node, len, 0); return ubifs_wbuf_write_nolock(wbuf, node, len); @@ -285,7 +287,8 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, *lnum = c->jheads[jhead].wbuf.lnum; *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; - dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + dbg_jnl("jhead %s, LEB %d:%d, len %d", + dbg_jhead(jhead), *lnum, *offs, len); err = ubifs_wbuf_write_nolock(wbuf, buf, len); if (err) diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 5fa27ea031b..0f530c684f0 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -229,23 +229,6 @@ static inline void xent_key_init(const struct ubifs_info *c, } /** - * xent_key_init_hash - initialize extended attribute entry key without - * re-calculating hash function. - * @c: UBIFS file-system description object - * @key: key to initialize - * @inum: host inode number - * @hash: extended attribute entry name hash - */ -static inline void xent_key_init_hash(const struct ubifs_info *c, - union ubifs_key *key, ino_t inum, - uint32_t hash) -{ - ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); - key->u32[0] = inum; - key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); -} - -/** * xent_key_init_flash - initialize on-flash extended attribute entry key. * @c: UBIFS file-system description object * @k: key to initialize @@ -295,22 +278,15 @@ static inline void data_key_init(const struct ubifs_info *c, } /** - * data_key_init_flash - initialize on-flash data key. + * highest_data_key - get the highest possible data key for an inode. * @c: UBIFS file-system description object - * @k: key to initialize + * @key: key to initialize * @inum: inode number - * @block: block number */ -static inline void data_key_init_flash(const struct ubifs_info *c, void *k, - ino_t inum, unsigned int block) +static inline void highest_data_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) { - union ubifs_key *key = k; - - ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); - key->j32[0] = cpu_to_le32(inum); - key->j32[1] = cpu_to_le32(block | - (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); - memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); + data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK); } /** @@ -554,4 +530,5 @@ static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) return 0; } } + #endif /* !__UBIFS_KEY_H__ */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 56e33772a1e..c345e125f42 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -169,8 +169,8 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) */ c->bud_bytes += c->leb_size - bud->start; - dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, - bud->start, bud->jhead, c->bud_bytes); + dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum, + bud->start, dbg_jhead(bud->jhead), c->bud_bytes); spin_unlock(&c->buds_lock); } @@ -355,16 +355,16 @@ static void remove_buds(struct ubifs_info *c) * heads (non-closed buds). */ c->cmt_bud_bytes += wbuf->offs - bud->start; - dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " + dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " "cmt_bud_bytes %lld", bud->lnum, bud->start, - bud->jhead, wbuf->offs - bud->start, + dbg_jhead(bud->jhead), wbuf->offs - bud->start, c->cmt_bud_bytes); bud->start = wbuf->offs; } else { c->cmt_bud_bytes += c->leb_size - bud->start; - dbg_log("remove %d:%d, jhead %d, bud bytes %d, " + dbg_log("remove %d:%d, jhead %s, bud bytes %d, " "cmt_bud_bytes %lld", bud->lnum, bud->start, - bud->jhead, c->leb_size - bud->start, + dbg_jhead(bud->jhead), c->leb_size - bud->start, c->cmt_bud_bytes); rb_erase(p1, &c->buds); /* @@ -429,7 +429,8 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) if (lnum == -1 || offs == c->leb_size) continue; - dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); + dbg_log("add ref to LEB %d:%d for jhead %s", + lnum, offs, dbg_jhead(i)); ref = buf + len; ref->ch.node_type = UBIFS_REF_NODE; ref->lnum = cpu_to_le32(lnum); @@ -695,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) lnum = c->ltail_lnum; write_lnum = lnum; while (1) { - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); goto out_free; diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 4cdd284dea5..4d4ca388889 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, case LPROPS_FREE: if (add_to_lpt_heap(c, lprops, cat)) break; - /* No more room on heap so make it uncategorized */ + /* No more room on heap so make it un-categorized */ cat = LPROPS_UNCAT; /* Fall through */ case LPROPS_UNCAT: @@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, * @lprops: LEB properties * * A LEB may have fallen off of the bottom of a heap, and ended up as - * uncategorized even though it has enough space for us now. If that is the case - * this function will put the LEB back onto a heap. + * un-categorized even though it has enough space for us now. If that is the + * case this function will put the LEB back onto a heap. */ void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) { @@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct ubifs_info *c, /** * change_category - change LEB properties category. * @c: UBIFS file-system description object - * @lprops: LEB properties to recategorize + * @lprops: LEB properties to re-categorize * * LEB properties are categorized to enable fast find operations. When the LEB - * properties change they must be recategorized. + * properties change they must be re-categorized. */ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) { @@ -461,21 +461,18 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) } /** - * calc_dark - calculate LEB dark space size. + * ubifs_calc_dark - calculate LEB dark space size. * @c: the UBIFS file-system description object * @spc: amount of free and dirty space in the LEB * - * This function calculates amount of dark space in an LEB which has @spc bytes - * of free and dirty space. Returns the calculations result. + * This function calculates and returns amount of dark space in an LEB which + * has @spc bytes of free and dirty space. * - * Dark space is the space which is not always usable - it depends on which - * nodes are written in which order. E.g., if an LEB has only 512 free bytes, - * it is dark space, because it cannot fit a large data node. So UBIFS cannot - * count on this LEB and treat these 512 bytes as usable because it is not true - * if, for example, only big chunks of uncompressible data will be written to - * the FS. + * UBIFS is trying to account the space which might not be usable, and this + * space is called "dark space". For example, if an LEB has only %512 free + * bytes, it is dark space, because it cannot fit a large data node. */ -static int calc_dark(struct ubifs_info *c, int spc) +int ubifs_calc_dark(const struct ubifs_info *c, int spc) { ubifs_assert(!(spc & 7)); @@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) * @free: new free space amount * @dirty: new dirty space amount * @flags: new flags - * @idx_gc_cnt: change to the count of idx_gc list + * @idx_gc_cnt: change to the count of @idx_gc list * * This function changes LEB properties (@free, @dirty or @flag). However, the * property which has the %LPROPS_NC value is not changed. Returns a pointer to @@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, { /* * This is the only function that is allowed to change lprops, so we - * discard the const qualifier. + * discard the "const" qualifier. */ struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; @@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, if (old_spc < c->dead_wm) c->lst.total_dead -= old_spc; else - c->lst.total_dark -= calc_dark(c, old_spc); + c->lst.total_dark -= ubifs_calc_dark(c, old_spc); c->lst.total_used -= c->leb_size - old_spc; } @@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, if (new_spc < c->dead_wm) c->lst.total_dead += new_spc; else - c->lst.total_dark += calc_dark(c, new_spc); + c->lst.total_dark += ubifs_calc_dark(c, new_spc); c->lst.total_used += c->leb_size - new_spc; } @@ -1096,7 +1093,7 @@ static int scan_check_cb(struct ubifs_info *c, } } - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); if (IS_ERR(sleb)) { /* * After an unclean unmount, empty and freeable LEBs @@ -1107,7 +1104,7 @@ static int scan_check_cb(struct ubifs_info *c, "- continuing checking"); lst->empty_lebs += 1; lst->total_free += c->leb_size; - lst->total_dark += calc_dark(c, c->leb_size); + lst->total_dark += ubifs_calc_dark(c, c->leb_size); return LPT_SCAN_CONTINUE; } @@ -1117,7 +1114,7 @@ static int scan_check_cb(struct ubifs_info *c, "- continuing checking"); lst->total_free += lp->free; lst->total_dirty += lp->dirty; - lst->total_dark += calc_dark(c, c->leb_size); + lst->total_dark += ubifs_calc_dark(c, c->leb_size); return LPT_SCAN_CONTINUE; } data->err = PTR_ERR(sleb); @@ -1235,7 +1232,7 @@ static int scan_check_cb(struct ubifs_info *c, if (spc < c->dead_wm) lst->total_dead += spc; else - lst->total_dark += calc_dark(c, spc); + lst->total_dark += ubifs_calc_dark(c, spc); } ubifs_scan_destroy(sleb); diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index a88f33801b9..28beaeedadc 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -29,7 +29,8 @@ * @c: UBIFS file-system description object * * This function scans the master node LEBs and search for the latest master - * node. Returns zero in case of success and a negative error code in case of + * node. Returns zero in case of success, %-EUCLEAN if there master area is + * corrupted and requires recovery, and a negative error code in case of * failure. */ static int scan_for_master(struct ubifs_info *c) @@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_info *c) lnum = UBIFS_MST_LNUM; - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) return PTR_ERR(sleb); nodes_cnt = sleb->nodes_cnt; @@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_info *c) snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); if (snod->type != UBIFS_MST_NODE) - goto out; + goto out_dump; memcpy(c->mst_node, snod->node, snod->len); offs = snod->offs; } @@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_info *c) lnum += 1; - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) return PTR_ERR(sleb); if (sleb->nodes_cnt != nodes_cnt) @@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_info *c) goto out; snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); if (snod->type != UBIFS_MST_NODE) - goto out; + goto out_dump; if (snod->offs != offs) goto out; if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, @@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_info *c) out: ubifs_scan_destroy(sleb); + return -EUCLEAN; + +out_dump: + ubifs_err("unexpected node type %d master LEB %d:%d", + snod->type, lnum, snod->offs); + ubifs_scan_destroy(sleb); return -EINVAL; } @@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info *c) err = scan_for_master(c); if (err) { - err = ubifs_recover_master_node(c); + if (err == -EUCLEAN) + err = ubifs_recover_master_node(c); if (err) /* * Note, we do not free 'c->mst_node' here because the diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 152a7b34a14..82009c74b6a 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -670,9 +670,10 @@ static int kill_orphans(struct ubifs_info *c) struct ubifs_scan_leb *sleb; dbg_rcvry("LEB %d", lnum); - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) { - sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + if (PTR_ERR(sleb) == -EUCLEAN) + sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; @@ -899,7 +900,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { struct ubifs_scan_leb *sleb; - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index e5f6cf8a115..f94ddf7efba 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -286,7 +286,7 @@ int ubifs_recover_master_node(struct ubifs_info *c) mst = mst2; } - dbg_rcvry("recovered master node from LEB %d", + ubifs_msg("recovered master node from LEB %d", (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); @@ -790,7 +790,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * We can only recover at the end of the log, so check that the * next log LEB is empty or out of date. */ - sleb = ubifs_scan(c, next_lnum, 0, sbuf); + sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0); if (IS_ERR(sleb)) return sleb; if (sleb->nodes_cnt) { diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 2970500f32d..5c2d6d759a3 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -506,7 +506,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, if (c->need_recovery) sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); else - sleb = ubifs_scan(c, lnum, offs, c->sbuf); + sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) return PTR_ERR(sleb); @@ -836,8 +836,8 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) const struct ubifs_cs_node *node; dbg_mnt("replay log LEB %d:%d", lnum, offs); - sleb = ubifs_scan(c, lnum, offs, sbuf); - if (IS_ERR(sleb) ) { + sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); + if (IS_ERR(sleb)) { if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) return PTR_ERR(sleb); sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 892ebfee4fe..96c52538419 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, /* Make the node pads to 8-byte boundary */ if ((node_len + pad_len) & 7) { - if (!quiet) { + if (!quiet) dbg_err("bad padding length %d - %d", offs, offs + node_len + pad_len); - } return SCANNED_A_BAD_PAD_NODE; } @@ -253,15 +252,19 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, * @c: UBIFS file-system description object * @lnum: logical eraseblock number * @offs: offset to start at (usually zero) - * @sbuf: scan buffer (must be c->leb_size) + * @sbuf: scan buffer (must be of @c->leb_size bytes in size) + * @quiet: print no messages * * This function scans LEB number @lnum and returns complete information about * its contents. Returns the scaned information in case of success and, * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case * of failure. + * + * If @quiet is non-zero, this function does not print large and scary + * error messages and flash dumps in case of errors. */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, - int offs, void *sbuf) + int offs, void *sbuf, int quiet) { void *buf = sbuf + offs; int err, len = c->leb_size - offs; @@ -280,7 +283,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, cond_resched(); - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; @@ -320,7 +323,9 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, } if (offs % c->min_io_size) { - ubifs_err("empty space starts at non-aligned offset %d", offs); + if (!quiet) + ubifs_err("empty space starts at non-aligned offset %d", + offs); goto corrupted;; } @@ -331,18 +336,25 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, break; for (; len; offs++, buf++, len--) if (*(uint8_t *)buf != 0xff) { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); + if (!quiet) + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); goto corrupted; } return sleb; corrupted: - ubifs_scanned_corruption(c, lnum, offs, buf); + if (!quiet) { + ubifs_scanned_corruption(c, lnum, offs, buf); + ubifs_err("LEB %d scanning failed", lnum); + } err = -EUCLEAN; + ubifs_scan_destroy(sleb); + return ERR_PTR(err); + error: - ubifs_err("LEB %d scanning failed", lnum); + ubifs_err("LEB %d scanning failed, error %d", lnum, err); ubifs_scan_destroy(sleb); return ERR_PTR(err); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c4af069df1a..333e181ee98 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -36,7 +36,6 @@ #include <linux/mount.h> #include <linux/math64.h> #include <linux/writeback.h> -#include <linux/smp_lock.h> #include "ubifs.h" /* @@ -318,6 +317,8 @@ static int ubifs_write_inode(struct inode *inode, int wait) if (err) ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + else + err = dbg_check_inode_size(c, inode, ui->ui_size); } ui->dirty = 0; @@ -448,17 +449,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) return 0; /* - * VFS calls '->sync_fs()' before synchronizing all dirty inodes and - * pages, so synchronize them first, then commit the journal. Strictly - * speaking, it is not necessary to commit the journal here, - * synchronizing write-buffers would be enough. But committing makes - * UBIFS free space predictions much more accurate, so we want to let - * the user be able to get more accurate results of 'statfs()' after - * they synchronize the file system. - */ - sync_inodes_sb(sb); - - /* * Synchronize write buffers, because 'ubifs_run_commit()' does not * do this if it waits for an already running commit. */ @@ -468,6 +458,13 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) return err; } + /* + * Strictly speaking, it is not necessary to commit the journal here, + * synchronizing write-buffers would be enough. But committing makes + * UBIFS free space predictions much more accurate, so we want to let + * the user be able to get more accurate results of 'statfs()' after + * they synchronize the file system. + */ err = ubifs_run_commit(c); if (err) return err; @@ -1720,8 +1717,6 @@ static void ubifs_put_super(struct super_block *sb) ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); - lock_kernel(); - /* * The following asserts are only valid if there has not been a failure * of the media. For example, there will be dirty inodes if we failed @@ -1786,8 +1781,6 @@ static void ubifs_put_super(struct super_block *sb) ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); kfree(c); - - unlock_kernel(); } static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) @@ -1803,22 +1796,17 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return err; } - lock_kernel(); if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); - unlock_kernel(); return -EROFS; } err = ubifs_remount_rw(c); - if (err) { - unlock_kernel(); + if (err) return err; - } } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); - unlock_kernel(); return -EROFS; } ubifs_remount_ro(c); @@ -1833,7 +1821,6 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } ubifs_assert(c->lst.taken_empty_lebs > 0); - unlock_kernel(); return 0; } diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f249f7b0d65..e5b1a7d00fa 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -1159,8 +1159,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, * o exact match, i.e. the found zero-level znode contains key @key, then %1 * is returned and slot number of the matched branch is stored in @n; * o not exact match, which means that zero-level znode does not contain - * @key, then %0 is returned and slot number of the closed branch is stored - * in @n; + * @key, then %0 is returned and slot number of the closest branch is stored + * in @n; * o @key is so small that it is even less than the lowest key of the * leftmost zero-level node, then %0 is returned and %0 is stored in @n. * @@ -1433,7 +1433,7 @@ static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) * @lnum: LEB number is returned here * @offs: offset is returned here * - * This function look up and reads node with key @key. The caller has to make + * This function looks up and reads node with key @key. The caller has to make * sure the @node buffer is large enough to fit the node. Returns zero in case * of success, %-ENOENT if the node was not found, and a negative error code in * case of failure. The node location can be returned in @lnum and @offs. @@ -3268,3 +3268,73 @@ out_unlock: mutex_unlock(&c->tnc_mutex); return err; } + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_check_inode_size - check if inode size is correct. + * @c: UBIFS file-system description object + * @inum: inode number + * @size: inode size + * + * This function makes sure that the inode size (@size) is correct and it does + * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL + * if it has a data page beyond @size, and other negative error code in case of + * other errors. + */ +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + loff_t size) +{ + int err, n; + union ubifs_key from_key, to_key, *key; + struct ubifs_znode *znode; + unsigned int block; + + if (!S_ISREG(inode->i_mode)) + return 0; + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; + data_key_init(c, &from_key, inode->i_ino, block); + highest_data_key(c, &to_key, inode->i_ino); + + mutex_lock(&c->tnc_mutex); + err = ubifs_lookup_level0(c, &from_key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) { + err = -EINVAL; + key = &from_key; + goto out_dump; + } + + err = tnc_next(c, &znode, &n); + if (err == -ENOENT) { + err = 0; + goto out_unlock; + } + if (err < 0) + goto out_unlock; + + ubifs_assert(err == 0); + key = &znode->zbranch[n].key; + if (!key_in_range(c, key, &from_key, &to_key)) + goto out_unlock; + +out_dump: + block = key_block(c, key); + ubifs_err("inode %lu has size %lld, but there are data at offset %lld " + "(data key %s)", (unsigned long)inode->i_ino, size, + ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); + dbg_dump_inode(c, inode); + dbg_dump_stack(); + err = -EINVAL; + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index fde8d127c76..53288e5d604 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p) * it is more comprehensive and less efficient than is needed for this * purpose. */ - sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); + sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0); c->ileb_len = 0; if (IS_ERR(sleb)) return PTR_ERR(sleb); diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 3eee07e0c49..191ca7863fe 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -135,6 +135,13 @@ /* The key is always at the same position in all keyed nodes */ #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) +/* Garbage collector journal head number */ +#define UBIFS_GC_HEAD 0 +/* Base journal head number */ +#define UBIFS_BASE_HEAD 1 +/* Data journal head number */ +#define UBIFS_DATA_HEAD 2 + /* * LEB Properties Tree node types. * diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a2934909442..b2d976366a4 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -105,12 +105,10 @@ /* Number of non-data journal heads */ #define NONDATA_JHEADS_CNT 2 -/* Garbage collector head */ -#define GCHD 0 -/* Base journal head number */ -#define BASEHD 1 -/* First "general purpose" journal head */ -#define DATAHD 2 +/* Shorter names for journal head numbers for internal usage */ +#define GCHD UBIFS_GC_HEAD +#define BASEHD UBIFS_BASE_HEAD +#define DATAHD UBIFS_DATA_HEAD /* 'No change' value for 'ubifs_change_lp()' */ #define LPROPS_NC 0x80000001 @@ -1451,7 +1449,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); /* scan.c */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, - int offs, void *sbuf); + int offs, void *sbuf, int quiet); void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, int offs, int quiet); @@ -1676,6 +1674,7 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); +int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index adafcf55653..195830f4756 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -78,9 +78,9 @@ enum { SECURITY_XATTR, }; -static struct inode_operations none_inode_operations; -static struct address_space_operations none_address_operations; -static struct file_operations none_file_operations; +static const struct inode_operations none_inode_operations; +static const struct address_space_operations none_address_operations; +static const struct file_operations none_file_operations; /** * create_xattr - create an extended attribute. diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index cb6e2cca214..9e41f91aa26 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -150,7 +150,7 @@ xfs_fs_set_xquota( return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); } -struct quotactl_ops xfs_quotactl_operations = { +const struct quotactl_ops xfs_quotactl_operations = { .quota_sync = xfs_fs_quota_sync, .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 5d7c60ac77b..bdd41c8c342 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -67,7 +67,7 @@ #include <linux/freezer.h> #include <linux/parser.h> -static struct super_operations xfs_super_operations; +static const struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; @@ -1536,7 +1536,7 @@ xfs_fs_get_sb( mnt); } -static struct super_operations xfs_super_operations = { +static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, .write_inode = xfs_fs_write_inode, diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 5a2ea3a2178..18175ebd58e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern const struct export_operations xfs_export_operations; extern struct xattr_handler *xfs_xattr_handlers[]; -extern struct quotactl_ops xfs_quotactl_operations; +extern const struct quotactl_ops xfs_quotactl_operations; #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c4ea51b55dc..f52ac276277 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -117,7 +117,7 @@ struct getbmapx { #define BMV_IF_VALID \ (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) -/* bmv_oflags values - returned for for each non-header segment */ +/* bmv_oflags values - returned for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ #define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ #define BMV_OF_LAST 0x4 /* segment is the last in the file */ |