diff options
Diffstat (limited to 'fs')
56 files changed, 1443 insertions, 744 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 17c9c5ec14c..801db134181 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -25,7 +25,7 @@ config BINFMT_ELF config COMPAT_BINFMT_ELF bool - depends on COMPAT && MMU + depends on COMPAT && BINFMT_ELF config BINFMT_ELF_FDPIC bool "Kernel support for FDPIC ELF binaries" diff --git a/fs/Makefile b/fs/Makefile index b6f27dc26b7..d0c69f57e5b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -8,7 +8,7 @@ obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ - attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ + attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ stack.o @@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o +obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o diff --git a/fs/afs/file.c b/fs/afs/file.c index 525f7c56e06..a3901769a96 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -50,8 +50,8 @@ const struct address_space_operations afs_fs_aops = { .launder_page = afs_launder_page, .releasepage = afs_releasepage, .invalidatepage = afs_invalidatepage, - .prepare_write = afs_prepare_write, - .commit_write = afs_commit_write, + .write_begin = afs_write_begin, + .write_end = afs_write_end, .writepage = afs_writepage, .writepages = afs_writepages, }; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3cb6920ff30..67f259d99cd 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -728,8 +728,12 @@ extern int afs_volume_release_fileserver(struct afs_vnode *, */ extern int afs_set_page_dirty(struct page *); extern void afs_put_writeback(struct afs_writeback *); -extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned); -extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned); +extern int afs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); +extern int afs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern int afs_write_inode(struct inode *, int); diff --git a/fs/afs/write.c b/fs/afs/write.c index 065b4e10681..d6b85dab35f 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -84,15 +84,23 @@ void afs_put_writeback(struct afs_writeback *wb) * partly or wholly fill a page that's under preparation for writing */ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, - unsigned start, unsigned len, struct page *page) + loff_t pos, unsigned len, struct page *page) { + loff_t i_size; + unsigned eof; int ret; - _enter(",,%u,%u", start, len); + _enter(",,%llu,%u", (unsigned long long)pos, len); - ASSERTCMP(start + len, <=, PAGE_SIZE); + ASSERTCMP(len, <=, PAGE_CACHE_SIZE); - ret = afs_vnode_fetch_data(vnode, key, start, len, page); + i_size = i_size_read(&vnode->vfs_inode); + if (pos + len > i_size) + eof = i_size; + else + eof = PAGE_CACHE_SIZE; + + ret = afs_vnode_fetch_data(vnode, key, 0, eof, page); if (ret < 0) { if (ret == -ENOENT) { _debug("got NOENT from server" @@ -107,109 +115,55 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, } /* - * prepare a page for being written to - */ -static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, - struct key *key, unsigned offset, unsigned to) -{ - unsigned eof, tail, start, stop, len; - loff_t i_size, pos; - void *p; - int ret; - - _enter(""); - - if (offset == 0 && to == PAGE_SIZE) - return 0; - - p = kmap_atomic(page, KM_USER0); - - i_size = i_size_read(&vnode->vfs_inode); - pos = (loff_t) page->index << PAGE_SHIFT; - if (pos >= i_size) { - /* partial write, page beyond EOF */ - _debug("beyond"); - if (offset > 0) - memset(p, 0, offset); - if (to < PAGE_SIZE) - memset(p + to, 0, PAGE_SIZE - to); - kunmap_atomic(p, KM_USER0); - return 0; - } - - if (i_size - pos >= PAGE_SIZE) { - /* partial write, page entirely before EOF */ - _debug("before"); - tail = eof = PAGE_SIZE; - } else { - /* partial write, page overlaps EOF */ - eof = i_size - pos; - _debug("overlap %u", eof); - tail = max(eof, to); - if (tail < PAGE_SIZE) - memset(p + tail, 0, PAGE_SIZE - tail); - if (offset > eof) - memset(p + eof, 0, PAGE_SIZE - eof); - } - - kunmap_atomic(p, KM_USER0); - - ret = 0; - if (offset > 0 || eof > to) { - /* need to fill one or two bits that aren't going to be written - * (cover both fillers in one read if there are two) */ - start = (offset > 0) ? 0 : to; - stop = (eof > to) ? eof : offset; - len = stop - start; - _debug("wr=%u-%u av=0-%u rd=%u@%u", - offset, to, eof, start, len); - ret = afs_fill_page(vnode, key, start, len, page); - } - - _leave(" = %d", ret); - return ret; -} - -/* * prepare to perform part of a write to a page - * - the caller holds the page locked, preventing it from being written out or - * modified by anyone else */ -int afs_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +int afs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { struct afs_writeback *candidate, *wb; struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + struct page *page; struct key *key = file->private_data; - pgoff_t index; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned to = from + len; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; int ret; _enter("{%x:%u},{%lx},%u,%u", - vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); + vnode->fid.vid, vnode->fid.vnode, index, from, to); candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); if (!candidate) return -ENOMEM; candidate->vnode = vnode; - candidate->first = candidate->last = page->index; - candidate->offset_first = offset; + candidate->first = candidate->last = index; + candidate->offset_first = from; candidate->to_last = to; candidate->usage = 1; candidate->state = AFS_WBACK_PENDING; init_waitqueue_head(&candidate->waitq); + page = __grab_cache_page(mapping, index); + if (!page) { + kfree(candidate); + return -ENOMEM; + } + *pagep = page; + /* page won't leak in error case: it eventually gets cleaned off LRU */ + if (!PageUptodate(page)) { _debug("not up to date"); - ret = afs_prepare_page(vnode, page, key, offset, to); + ret = afs_fill_page(vnode, key, pos, len, page); if (ret < 0) { kfree(candidate); _leave(" = %d [prep]", ret); return ret; } + SetPageUptodate(page); } try_again: - index = page->index; spin_lock(&vnode->writeback_lock); /* see if this page is already pending a writeback under a suitable key @@ -242,8 +196,8 @@ try_again: subsume_in_current_wb: _debug("subsume"); ASSERTRANGE(wb->first, <=, index, <=, wb->last); - if (index == wb->first && offset < wb->offset_first) - wb->offset_first = offset; + if (index == wb->first && from < wb->offset_first) + wb->offset_first = from; if (index == wb->last && to > wb->to_last) wb->to_last = to; spin_unlock(&vnode->writeback_lock); @@ -289,17 +243,17 @@ flush_conflicting_wb: /* * finalise part of a write to a page */ -int afs_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +int afs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); loff_t i_size, maybe_i_size; - _enter("{%x:%u},{%lx},%u,%u", - vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); + _enter("{%x:%u},{%lx}", + vnode->fid.vid, vnode->fid.vnode, page->index); - maybe_i_size = (loff_t) page->index << PAGE_SHIFT; - maybe_i_size += to; + maybe_i_size = pos + copied; i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) { @@ -310,12 +264,13 @@ int afs_commit_write(struct file *file, struct page *page, spin_unlock(&vnode->writeback_lock); } - SetPageUptodate(page); set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); + unlock_page(page); + page_cache_release(page); - return 0; + return copied; } /* diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile index f2c3b79e94d..a811c1f7d9a 100644 --- a/fs/autofs4/Makefile +++ b/fs/autofs4/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_AUTOFS4_FS) += autofs4.o -autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o +autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 69a2f5c9231..e0f16da00e5 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -14,6 +14,7 @@ /* Internal header file for autofs */ #include <linux/auto_fs4.h> +#include <linux/auto_dev-ioctl.h> #include <linux/mutex.h> #include <linux/list.h> @@ -21,6 +22,11 @@ #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY #define AUTOFS_IOC_COUNT 32 +#define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) +#define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11) + +#define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET) + #include <linux/kernel.h> #include <linux/slab.h> #include <linux/time.h> @@ -35,11 +41,27 @@ /* #define DEBUG */ #ifdef DEBUG -#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0) +#define DPRINTK(fmt, args...) \ +do { \ + printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \ + current->pid, __func__, ##args); \ +} while (0) #else -#define DPRINTK(fmt,args...) do {} while(0) +#define DPRINTK(fmt, args...) do {} while (0) #endif +#define AUTOFS_WARN(fmt, args...) \ +do { \ + printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ + current->pid, __func__, ##args); \ +} while (0) + +#define AUTOFS_ERROR(fmt, args...) \ +do { \ + printk(KERN_ERR "pid %d: %s: " fmt "\n", \ + current->pid, __func__, ##args); \ +} while (0) + /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never @@ -61,6 +83,9 @@ struct autofs_info { unsigned long last_used; atomic_t count; + uid_t uid; + gid_t gid; + mode_t mode; size_t size; @@ -92,10 +117,6 @@ struct autofs_wait_queue { #define AUTOFS_SBI_MAGIC 0x6d4a556d -#define AUTOFS_TYPE_INDIRECT 0x0001 -#define AUTOFS_TYPE_DIRECT 0x0002 -#define AUTOFS_TYPE_OFFSET 0x0004 - struct autofs_sb_info { u32 magic; int pipefd; @@ -169,6 +190,17 @@ int autofs4_expire_run(struct super_block *, struct vfsmount *, struct autofs_packet_expire __user *); int autofs4_expire_multi(struct super_block *, struct vfsmount *, struct autofs_sb_info *, int __user *); +struct dentry *autofs4_expire_direct(struct super_block *sb, + struct vfsmount *mnt, + struct autofs_sb_info *sbi, int how); +struct dentry *autofs4_expire_indirect(struct super_block *sb, + struct vfsmount *mnt, + struct autofs_sb_info *sbi, int how); + +/* Device node initialization */ + +int autofs_dev_ioctl_init(void); +void autofs_dev_ioctl_exit(void); /* Operations structures */ diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c new file mode 100644 index 00000000000..625abf5422e --- /dev/null +++ b/fs/autofs4/dev-ioctl.c @@ -0,0 +1,863 @@ +/* + * Copyright 2008 Red Hat, Inc. All rights reserved. + * Copyright 2008 Ian Kent <raven@themaw.net> + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + */ + +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/miscdevice.h> +#include <linux/init.h> +#include <linux/wait.h> +#include <linux/namei.h> +#include <linux/fcntl.h> +#include <linux/file.h> +#include <linux/fdtable.h> +#include <linux/sched.h> +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <linux/smp_lock.h> +#include <linux/magic.h> +#include <linux/dcache.h> +#include <linux/uaccess.h> + +#include "autofs_i.h" + +/* + * This module implements an interface for routing autofs ioctl control + * commands via a miscellaneous device file. + * + * The alternate interface is needed because we need to be able open + * an ioctl file descriptor on an autofs mount that may be covered by + * another mount. This situation arises when starting automount(8) + * or other user space daemon which uses direct mounts or offset + * mounts (used for autofs lazy mount/umount of nested mount trees), + * which have been left busy at at service shutdown. + */ + +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) + +typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, + struct autofs_dev_ioctl *); + +static int check_name(const char *name) +{ + if (!strchr(name, '/')) + return -EINVAL; + return 0; +} + +/* + * Check a string doesn't overrun the chunk of + * memory we copied from user land. + */ +static int invalid_str(char *str, void *end) +{ + while ((void *) str <= end) + if (!*str++) + return 0; + return -EINVAL; +} + +/* + * Check that the user compiled against correct version of autofs + * misc device code. + * + * As well as checking the version compatibility this always copies + * the kernel interface version out. + */ +static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) +{ + int err = 0; + + if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) || + (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) { + AUTOFS_WARN("ioctl control interface version mismatch: " + "kernel(%u.%u), user(%u.%u), cmd(%d)", + AUTOFS_DEV_IOCTL_VERSION_MAJOR, + AUTOFS_DEV_IOCTL_VERSION_MINOR, + param->ver_major, param->ver_minor, cmd); + err = -EINVAL; + } + + /* Fill in the kernel version. */ + param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; + param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; + + return err; +} + +/* + * Copy parameter control struct, including a possible path allocated + * at the end of the struct. + */ +static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) +{ + struct autofs_dev_ioctl tmp, *ads; + + if (copy_from_user(&tmp, in, sizeof(tmp))) + return ERR_PTR(-EFAULT); + + if (tmp.size < sizeof(tmp)) + return ERR_PTR(-EINVAL); + + ads = kmalloc(tmp.size, GFP_KERNEL); + if (!ads) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(ads, in, tmp.size)) { + kfree(ads); + return ERR_PTR(-EFAULT); + } + + return ads; +} + +static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) +{ + kfree(param); + return; +} + +/* + * Check sanity of parameter control fields and if a path is present + * check that it has a "/" and is terminated. + */ +static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) +{ + int err = -EINVAL; + + if (check_dev_ioctl_version(cmd, param)) { + AUTOFS_WARN("invalid device control module version " + "supplied for cmd(0x%08x)", cmd); + goto out; + } + + if (param->size > sizeof(*param)) { + err = check_name(param->path); + if (err) { + AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", + cmd); + goto out; + } + + err = invalid_str(param->path, + (void *) ((size_t) param + param->size)); + if (err) { + AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", + cmd); + goto out; + } + } + + err = 0; +out: + return err; +} + +/* + * Get the autofs super block info struct from the file opened on + * the autofs mount point. + */ +static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) +{ + struct autofs_sb_info *sbi = NULL; + struct inode *inode; + + if (f) { + inode = f->f_path.dentry->d_inode; + sbi = autofs4_sbi(inode->i_sb); + } + return sbi; +} + +/* Return autofs module protocol version */ +static int autofs_dev_ioctl_protover(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + param->arg1 = sbi->version; + return 0; +} + +/* Return autofs module protocol sub version */ +static int autofs_dev_ioctl_protosubver(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + param->arg1 = sbi->sub_version; + return 0; +} + +/* + * Walk down the mount stack looking for an autofs mount that + * has the requested device number (aka. new_encode_dev(sb->s_dev). + */ +static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno) +{ + struct dentry *dentry; + struct inode *inode; + struct super_block *sb; + dev_t s_dev; + unsigned int err; + + err = -ENOENT; + + /* Lookup the dentry name at the base of our mount point */ + dentry = d_lookup(nd->path.dentry, &nd->last); + if (!dentry) + goto out; + + dput(nd->path.dentry); + nd->path.dentry = dentry; + + /* And follow the mount stack looking for our autofs mount */ + while (follow_down(&nd->path.mnt, &nd->path.dentry)) { + inode = nd->path.dentry->d_inode; + if (!inode) + break; + + sb = inode->i_sb; + s_dev = new_encode_dev(sb->s_dev); + if (devno == s_dev) { + if (sb->s_magic == AUTOFS_SUPER_MAGIC) { + err = 0; + break; + } + } + } +out: + return err; +} + +/* + * Walk down the mount stack looking for an autofs mount that + * has the requested mount type (ie. indirect, direct or offset). + */ +static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type) +{ + struct dentry *dentry; + struct autofs_info *ino; + unsigned int err; + + err = -ENOENT; + + /* Lookup the dentry name at the base of our mount point */ + dentry = d_lookup(nd->path.dentry, &nd->last); + if (!dentry) + goto out; + + dput(nd->path.dentry); + nd->path.dentry = dentry; + + /* And follow the mount stack looking for our autofs mount */ + while (follow_down(&nd->path.mnt, &nd->path.dentry)) { + ino = autofs4_dentry_ino(nd->path.dentry); + if (ino && ino->sbi->type & type) { + err = 0; + break; + } + } +out: + return err; +} + +static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + BUG_ON(fdt->fd[fd] != NULL); + rcu_assign_pointer(fdt->fd[fd], file); + FD_SET(fd, fdt->close_on_exec); + spin_unlock(&files->file_lock); +} + + +/* + * Open a file descriptor on the autofs mount point corresponding + * to the given path and device number (aka. new_encode_dev(sb->s_dev)). + */ +static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid) +{ + struct file *filp; + struct nameidata nd; + int err, fd; + + fd = get_unused_fd(); + if (likely(fd >= 0)) { + /* Get nameidata of the parent directory */ + err = path_lookup(path, LOOKUP_PARENT, &nd); + if (err) + goto out; + + /* + * Search down, within the parent, looking for an + * autofs super block that has the device number + * corresponding to the autofs fs we want to open. + */ + err = autofs_dev_ioctl_find_super(&nd, devid); + if (err) { + path_put(&nd.path); + goto out; + } + + filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + + autofs_dev_ioctl_fd_install(fd, filp); + } + + return fd; + +out: + put_unused_fd(fd); + return err; +} + +/* Open a file descriptor on an autofs mount point */ +static int autofs_dev_ioctl_openmount(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + const char *path; + dev_t devid; + int err, fd; + + /* param->path has already been checked */ + if (!param->arg1) + return -EINVAL; + + param->ioctlfd = -1; + + path = param->path; + devid = param->arg1; + + err = 0; + fd = autofs_dev_ioctl_open_mountpoint(path, devid); + if (unlikely(fd < 0)) { + err = fd; + goto out; + } + + param->ioctlfd = fd; +out: + return err; +} + +/* Close file descriptor allocated above (user can also use close(2)). */ +static int autofs_dev_ioctl_closemount(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + return sys_close(param->ioctlfd); +} + +/* + * Send "ready" status for an existing wait (either a mount or an expire + * request). + */ +static int autofs_dev_ioctl_ready(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + autofs_wqt_t token; + + token = (autofs_wqt_t) param->arg1; + return autofs4_wait_release(sbi, token, 0); +} + +/* + * Send "fail" status for an existing wait (either a mount or an expire + * request). + */ +static int autofs_dev_ioctl_fail(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + autofs_wqt_t token; + int status; + + token = (autofs_wqt_t) param->arg1; + status = param->arg2 ? param->arg2 : -ENOENT; + return autofs4_wait_release(sbi, token, status); +} + +/* + * Set the pipe fd for kernel communication to the daemon. + * + * Normally this is set at mount using an option but if we + * are reconnecting to a busy mount then we need to use this + * to tell the autofs mount about the new kernel pipe fd. In + * order to protect mounts against incorrectly setting the + * pipefd we also require that the autofs mount be catatonic. + * + * This also sets the process group id used to identify the + * controlling process (eg. the owning automount(8) daemon). + */ +static int autofs_dev_ioctl_setpipefd(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + int pipefd; + int err = 0; + + if (param->arg1 == -1) + return -EINVAL; + + pipefd = param->arg1; + + mutex_lock(&sbi->wq_mutex); + if (!sbi->catatonic) { + mutex_unlock(&sbi->wq_mutex); + return -EBUSY; + } else { + struct file *pipe = fget(pipefd); + if (!pipe->f_op || !pipe->f_op->write) { + err = -EPIPE; + fput(pipe); + goto out; + } + sbi->oz_pgrp = task_pgrp_nr(current); + sbi->pipefd = pipefd; + sbi->pipe = pipe; + sbi->catatonic = 0; + } +out: + mutex_unlock(&sbi->wq_mutex); + return err; +} + +/* + * Make the autofs mount point catatonic, no longer responsive to + * mount requests. Also closes the kernel pipe file descriptor. + */ +static int autofs_dev_ioctl_catatonic(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + autofs4_catatonic_mode(sbi); + return 0; +} + +/* Set the autofs mount timeout */ +static int autofs_dev_ioctl_timeout(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + unsigned long timeout; + + timeout = param->arg1; + param->arg1 = sbi->exp_timeout / HZ; + sbi->exp_timeout = timeout * HZ; + return 0; +} + +/* + * Return the uid and gid of the last request for the mount + * + * When reconstructing an autofs mount tree with active mounts + * we need to re-connect to mounts that may have used the original + * process uid and gid (or string variations of them) for mount + * lookups within the map entry. + */ +static int autofs_dev_ioctl_requester(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + struct autofs_info *ino; + struct nameidata nd; + const char *path; + dev_t devid; + int err = -ENOENT; + + if (param->size <= sizeof(*param)) { + err = -EINVAL; + goto out; + } + + path = param->path; + devid = sbi->sb->s_dev; + + param->arg1 = param->arg2 = -1; + + /* Get nameidata of the parent directory */ + err = path_lookup(path, LOOKUP_PARENT, &nd); + if (err) + goto out; + + err = autofs_dev_ioctl_find_super(&nd, devid); + if (err) + goto out_release; + + ino = autofs4_dentry_ino(nd.path.dentry); + if (ino) { + err = 0; + autofs4_expire_wait(nd.path.dentry); + spin_lock(&sbi->fs_lock); + param->arg1 = ino->uid; + param->arg2 = ino->gid; + spin_unlock(&sbi->fs_lock); + } + +out_release: + path_put(&nd.path); +out: + return err; +} + +/* + * Call repeatedly until it returns -EAGAIN, meaning there's nothing + * more that can be done. + */ +static int autofs_dev_ioctl_expire(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + struct dentry *dentry; + struct vfsmount *mnt; + int err = -EAGAIN; + int how; + + how = param->arg1; + mnt = fp->f_path.mnt; + + if (sbi->type & AUTOFS_TYPE_TRIGGER) + dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how); + else + dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how); + + if (dentry) { + struct autofs_info *ino = autofs4_dentry_ino(dentry); + + /* + * This is synchronous because it makes the daemon a + * little easier + */ + err = autofs4_wait(sbi, dentry, NFY_EXPIRE); + + spin_lock(&sbi->fs_lock); + if (ino->flags & AUTOFS_INF_MOUNTPOINT) { + ino->flags &= ~AUTOFS_INF_MOUNTPOINT; + sbi->sb->s_root->d_mounted++; + } + ino->flags &= ~AUTOFS_INF_EXPIRING; + complete_all(&ino->expire_complete); + spin_unlock(&sbi->fs_lock); + dput(dentry); + } + + return err; +} + +/* Check if autofs mount point is in use */ +static int autofs_dev_ioctl_askumount(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + param->arg1 = 0; + if (may_umount(fp->f_path.mnt)) + param->arg1 = 1; + return 0; +} + +/* + * Check if the given path is a mountpoint. + * + * If we are supplied with the file descriptor of an autofs + * mount we're looking for a specific mount. In this case + * the path is considered a mountpoint if it is itself a + * mountpoint or contains a mount, such as a multi-mount + * without a root mount. In this case we return 1 if the + * path is a mount point and the super magic of the covering + * mount if there is one or 0 if it isn't a mountpoint. + * + * If we aren't supplied with a file descriptor then we + * lookup the nameidata of the path and check if it is the + * root of a mount. If a type is given we are looking for + * a particular autofs mount and if we don't find a match + * we return fail. If the located nameidata path is the + * root of a mount we return 1 along with the super magic + * of the mount or 0 otherwise. + * + * In both cases the the device number (as returned by + * new_encode_dev()) is also returned. + */ +static int autofs_dev_ioctl_ismountpoint(struct file *fp, + struct autofs_sb_info *sbi, + struct autofs_dev_ioctl *param) +{ + struct nameidata nd; + const char *path; + unsigned int type; + int err = -ENOENT; + + if (param->size <= sizeof(*param)) { + err = -EINVAL; + goto out; + } + + path = param->path; + type = param->arg1; + + param->arg1 = 0; + param->arg2 = 0; + + if (!fp || param->ioctlfd == -1) { + if (type == AUTOFS_TYPE_ANY) { + struct super_block *sb; + + err = path_lookup(path, LOOKUP_FOLLOW, &nd); + if (err) + goto out; + + sb = nd.path.dentry->d_sb; + param->arg1 = new_encode_dev(sb->s_dev); + } else { + struct autofs_info *ino; + + err = path_lookup(path, LOOKUP_PARENT, &nd); + if (err) + goto out; + + err = autofs_dev_ioctl_find_sbi_type(&nd, type); + if (err) + goto out_release; + + ino = autofs4_dentry_ino(nd.path.dentry); + param->arg1 = autofs4_get_dev(ino->sbi); + } + + err = 0; + if (nd.path.dentry->d_inode && + nd.path.mnt->mnt_root == nd.path.dentry) { + err = 1; + param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic; + } + } else { + dev_t devid = new_encode_dev(sbi->sb->s_dev); + + err = path_lookup(path, LOOKUP_PARENT, &nd); + if (err) + goto out; + + err = autofs_dev_ioctl_find_super(&nd, devid); + if (err) + goto out_release; + + param->arg1 = autofs4_get_dev(sbi); + + err = have_submounts(nd.path.dentry); + + if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { + if (follow_down(&nd.path.mnt, &nd.path.dentry)) { + struct inode *inode = nd.path.dentry->d_inode; + param->arg2 = inode->i_sb->s_magic; + } + } + } + +out_release: + path_put(&nd.path); +out: + return err; +} + +/* + * Our range of ioctl numbers isn't 0 based so we need to shift + * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table + * lookup. + */ +#define cmd_idx(cmd) (cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST)) + +static ioctl_fn lookup_dev_ioctl(unsigned int cmd) +{ + static struct { + int cmd; + ioctl_fn fn; + } _ioctls[] = { + {cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL}, + {cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD), + autofs_dev_ioctl_protover}, + {cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD), + autofs_dev_ioctl_protosubver}, + {cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD), + autofs_dev_ioctl_openmount}, + {cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD), + autofs_dev_ioctl_closemount}, + {cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD), + autofs_dev_ioctl_ready}, + {cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD), + autofs_dev_ioctl_fail}, + {cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD), + autofs_dev_ioctl_setpipefd}, + {cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD), + autofs_dev_ioctl_catatonic}, + {cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD), + autofs_dev_ioctl_timeout}, + {cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD), + autofs_dev_ioctl_requester}, + {cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD), + autofs_dev_ioctl_expire}, + {cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD), + autofs_dev_ioctl_askumount}, + {cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD), + autofs_dev_ioctl_ismountpoint} + }; + unsigned int idx = cmd_idx(cmd); + + return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn; +} + +/* ioctl dispatcher */ +static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user) +{ + struct autofs_dev_ioctl *param; + struct file *fp; + struct autofs_sb_info *sbi; + unsigned int cmd_first, cmd; + ioctl_fn fn = NULL; + int err = 0; + + /* only root can play with this */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST); + cmd = _IOC_NR(command); + + if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) || + cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) { + return -ENOTTY; + } + + /* Copy the parameters into kernel space. */ + param = copy_dev_ioctl(user); + if (IS_ERR(param)) + return PTR_ERR(param); + + err = validate_dev_ioctl(command, param); + if (err) + goto out; + + /* The validate routine above always sets the version */ + if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD) + goto done; + + fn = lookup_dev_ioctl(cmd); + if (!fn) { + AUTOFS_WARN("unknown command 0x%08x", command); + return -ENOTTY; + } + + fp = NULL; + sbi = NULL; + + /* + * For obvious reasons the openmount can't have a file + * descriptor yet. We don't take a reference to the + * file during close to allow for immediate release. + */ + if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && + cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { + fp = fget(param->ioctlfd); + if (!fp) { + if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) + goto cont; + err = -EBADF; + goto out; + } + + if (!fp->f_op) { + err = -ENOTTY; + fput(fp); + goto out; + } + + sbi = autofs_dev_ioctl_sbi(fp); + if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { + err = -EINVAL; + fput(fp); + goto out; + } + + /* + * Admin needs to be able to set the mount catatonic in + * order to be able to perform the re-open. + */ + if (!autofs4_oz_mode(sbi) && + cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { + err = -EACCES; + fput(fp); + goto out; + } + } +cont: + err = fn(fp, sbi, param); + + if (fp) + fput(fp); +done: + if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE)) + err = -EFAULT; +out: + free_dev_ioctl(param); + return err; +} + +static long autofs_dev_ioctl(struct file *file, uint command, ulong u) +{ + int err; + err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); + return (long) err; +} + +#ifdef CONFIG_COMPAT +static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u) +{ + return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u)); +} +#else +#define autofs_dev_ioctl_compat NULL +#endif + +static const struct file_operations _dev_ioctl_fops = { + .unlocked_ioctl = autofs_dev_ioctl, + .compat_ioctl = autofs_dev_ioctl_compat, + .owner = THIS_MODULE, +}; + +static struct miscdevice _autofs_dev_ioctl_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = AUTOFS_DEVICE_NAME, + .fops = &_dev_ioctl_fops +}; + +/* Register/deregister misc character device */ +int autofs_dev_ioctl_init(void) +{ + int r; + + r = misc_register(&_autofs_dev_ioctl_misc); + if (r) { + AUTOFS_ERROR("misc_register failed for control device"); + return r; + } + + return 0; +} + +void autofs_dev_ioctl_exit(void) +{ + misc_deregister(&_autofs_dev_ioctl_misc); + return; +} + diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cdabb796ff0..cde2f8e8935 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -244,10 +244,10 @@ cont: } /* Check if we can expire a direct mount (possibly a tree) */ -static struct dentry *autofs4_expire_direct(struct super_block *sb, - struct vfsmount *mnt, - struct autofs_sb_info *sbi, - int how) +struct dentry *autofs4_expire_direct(struct super_block *sb, + struct vfsmount *mnt, + struct autofs_sb_info *sbi, + int how) { unsigned long timeout; struct dentry *root = dget(sb->s_root); @@ -283,10 +283,10 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb, * - it is unused by any user process * - it has been unused for exp_timeout time */ -static struct dentry *autofs4_expire_indirect(struct super_block *sb, - struct vfsmount *mnt, - struct autofs_sb_info *sbi, - int how) +struct dentry *autofs4_expire_indirect(struct super_block *sb, + struct vfsmount *mnt, + struct autofs_sb_info *sbi, + int how) { unsigned long timeout; struct dentry *root = sb->s_root; @@ -479,7 +479,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, if (arg && get_user(do_now, arg)) return -EFAULT; - if (sbi->type & AUTOFS_TYPE_DIRECT) + if (sbi->type & AUTOFS_TYPE_TRIGGER) dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); else dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index 723a1c5e361..9722e4bd895 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -29,11 +29,20 @@ static struct file_system_type autofs_fs_type = { static int __init init_autofs4_fs(void) { - return register_filesystem(&autofs_fs_type); + int err; + + err = register_filesystem(&autofs_fs_type); + if (err) + return err; + + autofs_dev_ioctl_init(); + + return err; } static void __exit exit_autofs4_fs(void) { + autofs_dev_ioctl_exit(); unregister_filesystem(&autofs_fs_type); } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 45d55819203..c7e65bb30ba 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, atomic_set(&ino->count, 0); } + ino->uid = 0; + ino->gid = 0; ino->mode = mode; ino->last_used = jiffies; @@ -288,7 +290,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, *type = AUTOFS_TYPE_DIRECT; break; case Opt_offset: - *type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET; + *type = AUTOFS_TYPE_OFFSET; break; default: return 1; @@ -336,7 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; - sbi->type = 0; + sbi->type = AUTOFS_TYPE_INDIRECT; sbi->min_proto = 0; sbi->max_proto = 0; mutex_init(&sbi->wq_mutex); @@ -378,7 +380,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } root_inode->i_fop = &autofs4_root_operations; - root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ? + root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ? &autofs4_direct_root_inode_operations : &autofs4_indirect_root_inode_operations; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 35216d18d8b..4b67c2a2d77 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, * is very similar for indirect mounts except only dentrys * in the root of the autofs file system may be negative. */ - if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)) + if (sbi->type & AUTOFS_TYPE_TRIGGER) return -ENOENT; else if (!IS_ROOT(dentry->d_parent)) return -ENOENT; @@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, return -ENOMEM; /* If this is a direct mount request create a dummy name */ - if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) + if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER) qstr.len = sprintf(name, "%p", dentry); else { qstr.len = autofs4_getpath(sbi, dentry, &name); @@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, type = autofs_ptype_expire_multi; } else { if (notify == NFY_MOUNT) - type = (sbi->type & AUTOFS_TYPE_DIRECT) ? + type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? autofs_ptype_missing_direct : autofs_ptype_missing_indirect; else - type = (sbi->type & AUTOFS_TYPE_DIRECT) ? + type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? autofs_ptype_expire_direct : autofs_ptype_expire_indirect; } @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, status = wq->status; + /* + * For direct and offset mounts we need to track the requester's + * uid and gid in the dentry info struct. This is so it can be + * supplied, on request, by the misc device ioctl interface. + * This is needed during daemon resatart when reconnecting + * to existing, active, autofs mounts. The uid and gid (and + * related string values) may be used for macro substitution + * in autofs mount maps. + */ + if (!status) { + struct autofs_info *ino; + struct dentry *de = NULL; + + /* direct mount or browsable map */ + ino = autofs4_dentry_ino(dentry); + if (!ino) { + /* If not lookup actual dentry used */ + de = d_lookup(dentry->d_parent, &dentry->d_name); + if (de) + ino = autofs4_dentry_ino(de); + } + + /* Set mount requester */ + if (ino) { + spin_lock(&sbi->fs_lock); + ino->uid = wq->uid; + ino->gid = wq->gid; + spin_unlock(&sbi->fs_lock); + } + + if (de) + dput(de); + } + /* Are we the last process to need status? */ mutex_lock(&sbi->wq_mutex); if (!--wq->wait_ctr) diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h index e2595c2c403..7893eaa1e58 100644 --- a/fs/befs/befs_fs_types.h +++ b/fs/befs/befs_fs_types.h @@ -55,8 +55,12 @@ enum super_flags { }; #define BEFS_BYTEORDER_NATIVE 0x42494745 +#define BEFS_BYTEORDER_NATIVE_LE (__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE) +#define BEFS_BYTEORDER_NATIVE_BE (__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE) #define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1 +#define BEFS_SUPER_MAGIC1_LE (__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1) +#define BEFS_SUPER_MAGIC1_BE (__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1) /* * Flags of inode diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 9286b2af893..b6dfee37c7b 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -809,8 +809,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent) /* account for offset of super block on x86 */ disk_sb = (befs_super_block *) bh->b_data; - if ((le32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1) || - (be32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1)) { + if ((disk_sb->magic1 == BEFS_SUPER_MAGIC1_LE) || + (disk_sb->magic1 == BEFS_SUPER_MAGIC1_BE)) { befs_debug(sb, "Using PPC superblock location"); } else { befs_debug(sb, "Using x86 superblock location"); diff --git a/fs/befs/super.c b/fs/befs/super.c index 8c3401ff6d6..41f2b4d0093 100644 --- a/fs/befs/super.c +++ b/fs/befs/super.c @@ -26,10 +26,10 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb) befs_sb_info *befs_sb = BEFS_SB(sb); /* Check the byte order of the filesystem */ - if (le32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) + if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE) befs_sb->byte_order = BEFS_BYTESEX_LE; - else if (be32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) - befs_sb->byte_order = BEFS_BYTESEX_BE; + else if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_BE) + befs_sb->byte_order = BEFS_BYTESEX_BE; befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1); befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 655ed8d30a8..c76afa26edf 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -683,7 +683,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * switch really is going to happen - do this in * flush_thread(). - akpm */ - SET_PERSONALITY(loc->elf_ex, 0); + SET_PERSONALITY(loc->elf_ex); interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); @@ -734,7 +734,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto out_free_dentry; } else { /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex, 0); + SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -748,7 +748,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ - SET_PERSONALITY(loc->elf_ex, 0); + SET_PERSONALITY(loc->elf_ex); if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 80c1f952ef7..0e8367c5462 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -25,6 +25,7 @@ #include <linux/fcntl.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/security.h> #include <linux/highmem.h> #include <linux/highuid.h> #include <linux/personality.h> @@ -455,8 +456,19 @@ error_kill: } /*****************************************************************************/ + +#ifndef ELF_BASE_PLATFORM /* - * present useful information to the program + * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. + * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value + * will be copied to the user stack in the same manner as AT_PLATFORM. + */ +#define ELF_BASE_PLATFORM NULL +#endif + +/* + * present useful information to the program by shovelling it onto the new + * process's stack */ static int create_elf_fdpic_tables(struct linux_binprm *bprm, struct mm_struct *mm, @@ -466,15 +478,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, unsigned long sp, csp, nitems; elf_caddr_t __user *argv, *envp; size_t platform_len = 0, len; - char *k_platform; - char __user *u_platform, *p; + char *k_platform, *k_base_platform; + char __user *u_platform, *u_base_platform, *p; long hwcap; int loop; int nr; /* reset for each csp adjustment */ - /* we're going to shovel a whole load of stuff onto the stack */ #ifdef CONFIG_MMU - sp = bprm->p; + /* In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions + * by the processes running on the same package. One thing we can do is + * to shuffle the initial stack for them, so we give the architecture + * an opportunity to do so here. + */ + sp = arch_align_stack(bprm->p); #else sp = mm->start_stack; @@ -483,11 +499,14 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, return -EFAULT; #endif - /* get hold of platform and hardware capabilities masks for the machine - * we are running on. In some cases (Sparc), this info is impossible - * to get, in others (i386) it is merely difficult. - */ hwcap = ELF_HWCAP; + + /* + * If this architecture has a platform capability string, copy it + * to userspace. In some cases (Sparc), this info is impossible + * for userspace to get any other way, in others (i386) it is + * merely difficult. + */ k_platform = ELF_PLATFORM; u_platform = NULL; @@ -499,19 +518,20 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, return -EFAULT; } -#if defined(__i386__) && defined(CONFIG_SMP) - /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions - * by the processes running on the same package. One thing we can do is - * to shuffle the initial stack for them. - * - * the conditionals here are unneeded, but kept in to make the code - * behaviour the same as pre change unless we have hyperthreaded - * processors. This keeps Mr Marcelo Person happier but should be - * removed for 2.5 + /* + * If this architecture has a "base" platform capability + * string, copy it to userspace. */ - if (smp_num_siblings > 1) - sp = sp - ((current->pid % 64) << 7); -#endif + k_base_platform = ELF_BASE_PLATFORM; + u_base_platform = NULL; + + if (k_base_platform) { + platform_len = strlen(k_base_platform) + 1; + sp -= platform_len; + u_base_platform = (char __user *) sp; + if (__copy_to_user(u_base_platform, k_base_platform, platform_len) != 0) + return -EFAULT; + } sp &= ~7UL; @@ -541,9 +561,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, } /* force 16 byte _final_ alignment here for generality */ -#define DLINFO_ITEMS 13 +#define DLINFO_ITEMS 15 + + nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + + (k_base_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; - nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; + if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) + nitems++; csp = sp; sp -= nitems * 2 * sizeof(unsigned long); @@ -575,6 +599,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, (elf_addr_t) (unsigned long) u_platform); } + if (k_base_platform) { + nr = 0; + csp -= 2 * sizeof(unsigned long); + NEW_AUX_ENT(AT_BASE_PLATFORM, + (elf_addr_t) (unsigned long) u_base_platform); + } + + if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { + nr = 0; + csp -= 2 * sizeof(unsigned long); + NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); + } + nr = 0; csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); NEW_AUX_ENT(AT_HWCAP, hwcap); @@ -590,6 +627,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); + NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); + NEW_AUX_ENT(AT_EXECFN, bprm->exec); #ifdef ARCH_DLINFO nr = 0; diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f9c88d0c8ce..32fb00b52cd 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ + bprm->recursion_depth++; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index dfc0197905c..ccb781a6a80 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -229,13 +229,13 @@ static int decompress_exec( ret = 10; if (buf[3] & EXTRA_FIELD) { ret += 2 + buf[10] + (buf[11] << 8); - if (unlikely(LBUFSIZE == ret)) { + if (unlikely(LBUFSIZE <= ret)) { DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n"); goto out_free_buf; } } if (buf[3] & ORIG_NAME) { - for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) + while (ret < LBUFSIZE && buf[ret++] != 0) ; if (unlikely(LBUFSIZE == ret)) { DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); @@ -243,7 +243,7 @@ static int decompress_exec( } } if (buf[3] & COMMENT) { - for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) + while (ret < LBUFSIZE && buf[ret++] != 0) ; if (unlikely(LBUFSIZE == ret)) { DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n"); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 8d7e88e02e0..f2744ab4e5b 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -117,7 +117,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto _ret; retval = -ENOEXEC; - if (bprm->misc_bang) + if (bprm->recursion_depth > BINPRM_MAX_RECURSION) goto _ret; /* to keep locking time low, we copy the interpreter string */ @@ -197,7 +197,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval < 0) goto _error; - bprm->misc_bang = 1; + bprm->recursion_depth++; retval = search_binary_handler (bprm, regs); if (retval < 0) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 9e3963f7ebf..08343505e18 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -22,14 +22,15 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) char interp[BINPRM_BUF_SIZE]; int retval; - if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || (bprm->sh_bang)) + if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || + (bprm->recursion_depth > BINPRM_MAX_RECURSION)) return -ENOEXEC; /* * This section does the #! interpretation. * Sorta complicated, but hopefully it will work. -TYT */ - bprm->sh_bang = 1; + bprm->recursion_depth++; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 68be580ba28..74e587a5279 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -306,3 +306,5 @@ static void __exit exit_som_binfmt(void) core_initcall(init_som_binfmt); module_exit(exit_som_binfmt); + +MODULE_LICENSE("GPL"); diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cb7cda3d78..262fa10e213 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -22,9 +22,6 @@ #include <linux/mutex.h> #include <linux/backing-dev.h> -#ifdef CONFIG_KMOD -#include <linux/kmod.h> -#endif #include "internal.h" /* diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 0d9b80ec689..cfd29da714d 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -362,9 +362,8 @@ static int init_coda_psdev(void) goto out_chrdev; } for (i = 0; i < MAX_CODADEVS; i++) - device_create_drvdata(coda_psdev_class, NULL, - MKDEV(CODA_PSDEV_MAJOR, i), - NULL, "cfs%d", i); + device_create(coda_psdev_class, NULL, + MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i); coda_sysctl_init(); goto out; diff --git a/fs/compat.c b/fs/compat.c index 075d0509970..5f9ec449c79 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -137,6 +137,45 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _ return compat_sys_futimesat(AT_FDCWD, filename, t); } +static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) +{ + compat_ino_t ino = stat->ino; + typeof(ubuf->st_uid) uid = 0; + typeof(ubuf->st_gid) gid = 0; + int err; + + SET_UID(uid, stat->uid); + SET_GID(gid, stat->gid); + + if ((u64) stat->size > MAX_NON_LFS || + !old_valid_dev(stat->dev) || + !old_valid_dev(stat->rdev)) + return -EOVERFLOW; + if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) + return -EOVERFLOW; + + if (clear_user(ubuf, sizeof(*ubuf))) + return -EFAULT; + + err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); + err |= __put_user(ino, &ubuf->st_ino); + err |= __put_user(stat->mode, &ubuf->st_mode); + err |= __put_user(stat->nlink, &ubuf->st_nlink); + err |= __put_user(uid, &ubuf->st_uid); + err |= __put_user(gid, &ubuf->st_gid); + err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); + err |= __put_user(stat->size, &ubuf->st_size); + err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); + err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); + err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); + err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); + err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); + err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); + err |= __put_user(stat->blksize, &ubuf->st_blksize); + err |= __put_user(stat->blocks, &ubuf->st_blocks); + return err; +} + asmlinkage long compat_sys_newstat(char __user * filename, struct compat_stat __user *statbuf) { @@ -1239,7 +1278,7 @@ static int compat_count(compat_uptr_t __user *argv, int max) if (!p) break; argv++; - if(++i > max) + if (i++ >= max) return -E2BIG; } } diff --git a/fs/direct-io.c b/fs/direct-io.c index 9606ee848fd..af0558dbe8b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -5,11 +5,11 @@ * * O_DIRECT * - * 04Jul2002 akpm@zip.com.au + * 04Jul2002 Andrew Morton * Initial version * 11Sep2002 janetinc@us.ibm.com * added readv/writev support. - * 29Oct2002 akpm@zip.com.au + * 29Oct2002 Andrew Morton * rewrote bio_add_page() support. * 30Oct2002 pbadari@us.ibm.com * added support for non-aligned IO. diff --git a/fs/dquot.c b/fs/dquot.c index ad7e59003e0..da30a27f224 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -9,8 +9,6 @@ * implementation is based on one of the several variants of the LINUX * inode-subsystem with added complexity of the diskquota system. * - * Version: $Id: dquot.c,v 6.3 1996/11/17 18:35:34 mvw Exp mvw $ - * * Author: Marco van Wieringen <mvw@planets.elm.net> * * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index b4755a85996..2cc9ee4ad2e 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index b73fb752c5f..3504cf9df35 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -79,11 +79,6 @@ #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 #define ECRYPTFS_DEFAULT_NUM_USERS 4 #define ECRYPTFS_MAX_NUM_USERS 32768 -#define ECRYPTFS_TRANSPORT_NETLINK 0 -#define ECRYPTFS_TRANSPORT_CONNECTOR 1 -#define ECRYPTFS_TRANSPORT_RELAYFS 2 -#define ECRYPTFS_TRANSPORT_MISCDEV 3 -#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV #define ECRYPTFS_XATTR_NAME "user.ecryptfs" #define RFC2440_CIPHER_DES3_EDE 0x02 @@ -400,8 +395,6 @@ struct ecryptfs_msg_ctx { struct mutex mux; }; -extern unsigned int ecryptfs_transport; - struct ecryptfs_daemon; struct ecryptfs_daemon { @@ -627,31 +620,20 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); -int ecryptfs_process_helo(unsigned int transport, uid_t euid, - struct user_namespace *user_ns, struct pid *pid); +int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, + struct pid *pid); int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, struct pid *pid); int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, struct user_namespace *user_ns, struct pid *pid, u32 seq); -int ecryptfs_send_message(unsigned int transport, char *data, int data_len, +int ecryptfs_send_message(char *data, int data_len, struct ecryptfs_msg_ctx **msg_ctx); int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, struct ecryptfs_message **emsg); -int ecryptfs_init_messaging(unsigned int transport); -void ecryptfs_release_messaging(unsigned int transport); +int ecryptfs_init_messaging(void); +void ecryptfs_release_messaging(void); -int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, - u16 msg_flags, struct pid *daemon_pid); -int ecryptfs_init_netlink(void); -void ecryptfs_release_netlink(void); - -int ecryptfs_send_connector(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, - u16 msg_flags, struct pid *daemon_pid); -int ecryptfs_init_connector(void); -void ecryptfs_release_connector(void); void ecryptfs_write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 9244d653743..eb3dc4c7ac0 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -71,12 +71,11 @@ struct ecryptfs_getdents_callback { void *dirent; struct dentry *dentry; filldir_t filldir; - int err; int filldir_called; int entries_written; }; -/* Inspired by generic filldir in fs/readir.c */ +/* Inspired by generic filldir in fs/readdir.c */ static int ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) @@ -125,18 +124,18 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) buf.dirent = dirent; buf.dentry = file->f_path.dentry; buf.filldir = filldir; -retry: buf.filldir_called = 0; buf.entries_written = 0; - buf.err = 0; rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); - if (buf.err) - rc = buf.err; - if (buf.filldir_called && !buf.entries_written) - goto retry; file->f_pos = lower_file->f_pos; + if (rc < 0) + goto out; + if (buf.filldir_called && !buf.entries_written) + goto out; if (rc >= 0) - fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode); + fsstack_copy_attr_atime(inode, + lower_file->f_path.dentry->d_inode); +out: return rc; } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index f5b76a331b9..e22bc396134 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -234,8 +234,8 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, } i += data_len; if (message_len < (i + m_size)) { - ecryptfs_printk(KERN_ERR, "The received netlink message is " - "shorter than expected\n"); + ecryptfs_printk(KERN_ERR, "The message received from ecryptfsd " + "is shorter than expected\n"); rc = -EIO; goto out; } @@ -438,8 +438,8 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; - char *netlink_message; - size_t netlink_message_length; + char *payload; + size_t payload_len; int rc; rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); @@ -449,15 +449,15 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, goto out; } rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), - &netlink_message, &netlink_message_length); + &payload, &payload_len); if (rc) { ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); goto out; } - rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, - netlink_message_length, &msg_ctx); + rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { - ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); + ecryptfs_printk(KERN_ERR, "Error sending message to " + "ecryptfsd\n"); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); @@ -1333,23 +1333,22 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_key_record *key_rec) { struct ecryptfs_msg_ctx *msg_ctx = NULL; - char *netlink_payload; - size_t netlink_payload_length; + char *payload = NULL; + size_t payload_len; struct ecryptfs_message *msg; int rc; rc = write_tag_66_packet(auth_tok->token.private_key.signature, ecryptfs_code_for_cipher_string(crypt_stat), - crypt_stat, &netlink_payload, - &netlink_payload_length); + crypt_stat, &payload, &payload_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); goto out; } - rc = ecryptfs_send_message(ecryptfs_transport, netlink_payload, - netlink_payload_length, &msg_ctx); + rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { - ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); + ecryptfs_printk(KERN_ERR, "Error sending message to " + "ecryptfsd\n"); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); @@ -1364,8 +1363,7 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n"); kfree(msg); out: - if (netlink_payload) - kfree(netlink_payload); + kfree(payload); return rc; } /** diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 8ebe9a5d1d9..046e027a4cb 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -30,7 +30,6 @@ #include <linux/namei.h> #include <linux/skbuff.h> #include <linux/crypto.h> -#include <linux/netlink.h> #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/key.h> @@ -49,8 +48,7 @@ MODULE_PARM_DESC(ecryptfs_verbosity, "0, which is Quiet)"); /** - * Module parameter that defines the number of netlink message buffer - * elements + * Module parameter that defines the number of message buffer elements */ unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS; @@ -60,9 +58,9 @@ MODULE_PARM_DESC(ecryptfs_message_buf_len, /** * Module parameter that defines the maximum guaranteed amount of time to wait - * for a response through netlink. The actual sleep time will be, more than + * for a response from ecryptfsd. The actual sleep time will be, more than * likely, a small amount greater than this specified value, but only less if - * the netlink message successfully arrives. + * the message successfully arrives. */ signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ; @@ -83,8 +81,6 @@ module_param(ecryptfs_number_of_users, uint, 0); MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of " "concurrent users of eCryptfs"); -unsigned int ecryptfs_transport = ECRYPTFS_DEFAULT_TRANSPORT; - void __ecryptfs_printk(const char *fmt, ...) { va_list args; @@ -779,10 +775,11 @@ static int __init ecryptfs_init(void) "rc = [%d]\n", __func__, rc); goto out_do_sysfs_unregistration; } - rc = ecryptfs_init_messaging(ecryptfs_transport); + rc = ecryptfs_init_messaging(); if (rc) { printk(KERN_ERR "Failure occured while attempting to " - "initialize the eCryptfs netlink socket\n"); + "initialize the communications channel to " + "ecryptfsd\n"); goto out_destroy_kthread; } rc = ecryptfs_init_crypto(); @@ -797,7 +794,7 @@ static int __init ecryptfs_init(void) goto out; out_release_messaging: - ecryptfs_release_messaging(ecryptfs_transport); + ecryptfs_release_messaging(); out_destroy_kthread: ecryptfs_destroy_kthread(); out_do_sysfs_unregistration: @@ -818,7 +815,7 @@ static void __exit ecryptfs_exit(void) if (rc) printk(KERN_ERR "Failure whilst attempting to destroy crypto; " "rc = [%d]\n", rc); - ecryptfs_release_messaging(ecryptfs_transport); + ecryptfs_release_messaging(); ecryptfs_destroy_kthread(); do_sysfs_unregistration(); unregister_filesystem(&ecryptfs_fs_type); diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 1b5c20058ac..c6983978a31 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -134,12 +134,11 @@ out: } static int -ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, - u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx); +ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, + struct ecryptfs_msg_ctx **msg_ctx); /** * ecryptfs_send_raw_message - * @transport: Transport type * @msg_type: Message type * @daemon: Daemon struct for recipient of message * @@ -150,38 +149,25 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, * * Returns zero on success; non-zero otherwise */ -static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type, +static int ecryptfs_send_raw_message(u8 msg_type, struct ecryptfs_daemon *daemon) { struct ecryptfs_msg_ctx *msg_ctx; int rc; - switch(transport) { - case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, - daemon->pid); - break; - case ECRYPTFS_TRANSPORT_MISCDEV: - rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type, - &msg_ctx); - if (rc) { - printk(KERN_ERR "%s: Error whilst attempting to send " - "message via procfs; rc = [%d]\n", __func__, rc); - goto out; - } - /* Raw messages are logically context-free (e.g., no - * reply is expected), so we set the state of the - * ecryptfs_msg_ctx object to indicate that it should - * be freed as soon as the transport sends out the message. */ - mutex_lock(&msg_ctx->mux); - msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; - mutex_unlock(&msg_ctx->mux); - break; - case ECRYPTFS_TRANSPORT_CONNECTOR: - case ECRYPTFS_TRANSPORT_RELAYFS: - default: - rc = -ENOSYS; + rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx); + if (rc) { + printk(KERN_ERR "%s: Error whilst attempting to send " + "message to ecryptfsd; rc = [%d]\n", __func__, rc); + goto out; } + /* Raw messages are logically context-free (e.g., no + * reply is expected), so we set the state of the + * ecryptfs_msg_ctx object to indicate that it should + * be freed as soon as the message is sent. */ + mutex_lock(&msg_ctx->mux); + msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; + mutex_unlock(&msg_ctx->mux); out: return rc; } @@ -227,7 +213,6 @@ out: /** * ecryptfs_process_helo - * @transport: The underlying transport (netlink, etc.) * @euid: The user ID owner of the message * @user_ns: The namespace in which @euid applies * @pid: The process ID for the userspace program that sent the @@ -239,8 +224,8 @@ out: * Returns zero after adding a new daemon to the hash list; * non-zero otherwise. */ -int ecryptfs_process_helo(unsigned int transport, uid_t euid, - struct user_namespace *user_ns, struct pid *pid) +int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) { struct ecryptfs_daemon *new_daemon; struct ecryptfs_daemon *old_daemon; @@ -252,8 +237,7 @@ int ecryptfs_process_helo(unsigned int transport, uid_t euid, printk(KERN_WARNING "Received request from user [%d] " "to register daemon [0x%p]; unregistering daemon " "[0x%p]\n", euid, pid, old_daemon->pid); - rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, - old_daemon); + rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon); if (rc) printk(KERN_WARNING "Failed to send QUIT " "message to daemon [0x%p]; rc = [%d]\n", @@ -467,8 +451,6 @@ out: /** * ecryptfs_send_message_locked - * @transport: The transport over which to send the message (i.e., - * netlink) * @data: The data to send * @data_len: The length of data * @msg_ctx: The message context allocated for the send @@ -478,8 +460,8 @@ out: * Returns zero on success; non-zero otherwise */ static int -ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, - u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) +ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, + struct ecryptfs_msg_ctx **msg_ctx) { struct ecryptfs_daemon *daemon; int rc; @@ -503,20 +485,8 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); mutex_unlock(&(*msg_ctx)->mux); mutex_unlock(&ecryptfs_msg_ctx_lists_mux); - switch (transport) { - case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type, - 0, daemon->pid); - break; - case ECRYPTFS_TRANSPORT_MISCDEV: - rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, - 0, daemon); - break; - case ECRYPTFS_TRANSPORT_CONNECTOR: - case ECRYPTFS_TRANSPORT_RELAYFS: - default: - rc = -ENOSYS; - } + rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, 0, + daemon); if (rc) printk(KERN_ERR "%s: Error attempting to send message to " "userspace daemon; rc = [%d]\n", __func__, rc); @@ -526,8 +496,6 @@ out: /** * ecryptfs_send_message - * @transport: The transport over which to send the message (i.e., - * netlink) * @data: The data to send * @data_len: The length of data * @msg_ctx: The message context allocated for the send @@ -536,14 +504,14 @@ out: * * Returns zero on success; non-zero otherwise */ -int ecryptfs_send_message(unsigned int transport, char *data, int data_len, +int ecryptfs_send_message(char *data, int data_len, struct ecryptfs_msg_ctx **msg_ctx) { int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_send_message_locked(transport, data, data_len, - ECRYPTFS_MSG_REQUEST, msg_ctx); + rc = ecryptfs_send_message_locked(data, data_len, ECRYPTFS_MSG_REQUEST, + msg_ctx); mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } @@ -586,7 +554,7 @@ sleep: return rc; } -int ecryptfs_init_messaging(unsigned int transport) +int ecryptfs_init_messaging(void) { int i; int rc = 0; @@ -639,27 +607,14 @@ int ecryptfs_init_messaging(unsigned int transport) mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); } mutex_unlock(&ecryptfs_msg_ctx_lists_mux); - switch(transport) { - case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_init_netlink(); - if (rc) - ecryptfs_release_messaging(transport); - break; - case ECRYPTFS_TRANSPORT_MISCDEV: - rc = ecryptfs_init_ecryptfs_miscdev(); - if (rc) - ecryptfs_release_messaging(transport); - break; - case ECRYPTFS_TRANSPORT_CONNECTOR: - case ECRYPTFS_TRANSPORT_RELAYFS: - default: - rc = -ENOSYS; - } + rc = ecryptfs_init_ecryptfs_miscdev(); + if (rc) + ecryptfs_release_messaging(); out: return rc; } -void ecryptfs_release_messaging(unsigned int transport) +void ecryptfs_release_messaging(void) { if (ecryptfs_msg_ctx_arr) { int i; @@ -698,17 +653,6 @@ void ecryptfs_release_messaging(unsigned int transport) kfree(ecryptfs_daemon_hash); mutex_unlock(&ecryptfs_daemon_hash_mux); } - switch(transport) { - case ECRYPTFS_TRANSPORT_NETLINK: - ecryptfs_release_netlink(); - break; - case ECRYPTFS_TRANSPORT_MISCDEV: - ecryptfs_destroy_ecryptfs_miscdev(); - break; - case ECRYPTFS_TRANSPORT_CONNECTOR: - case ECRYPTFS_TRANSPORT_RELAYFS: - default: - break; - } + ecryptfs_destroy_ecryptfs_miscdev(); return; } diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 245c2dc02d5..04d7b3fa1ac 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -265,22 +265,34 @@ out: } /** - * ecryptfs_prepare_write + * ecryptfs_write_begin * @file: The eCryptfs file - * @page: The eCryptfs page - * @from: The start byte from which we will write - * @to: The end byte to which we will write + * @mapping: The eCryptfs object + * @pos: The file offset at which to start writing + * @len: Length of the write + * @flags: Various flags + * @pagep: Pointer to return the page + * @fsdata: Pointer to return fs data (unused) * * This function must zero any hole we create * * Returns zero on success; non-zero otherwise */ -static int ecryptfs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ecryptfs_write_begin(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + struct page *page; loff_t prev_page_end_size; int rc = 0; + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + if (!PageUptodate(page)) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private( @@ -289,8 +301,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { rc = ecryptfs_read_lower_page_segment( - page, page->index, 0, PAGE_CACHE_SIZE, - page->mapping->host); + page, index, 0, PAGE_CACHE_SIZE, mapping->host); if (rc) { printk(KERN_ERR "%s: Error attemping to read " "lower page segment; rc = [%d]\n", @@ -316,8 +327,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, SetPageUptodate(page); } else { rc = ecryptfs_read_lower_page_segment( - page, page->index, 0, PAGE_CACHE_SIZE, - page->mapping->host); + page, index, 0, PAGE_CACHE_SIZE, + mapping->host); if (rc) { printk(KERN_ERR "%s: Error reading " "page; rc = [%d]\n", @@ -339,10 +350,10 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, SetPageUptodate(page); } } - prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT); + prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); /* If creating a page or more of holes, zero them out via truncate. * Note, this will increase i_size. */ - if (page->index != 0) { + if (index != 0) { if (prev_page_end_size > i_size_read(page->mapping->host)) { rc = ecryptfs_truncate(file->f_path.dentry, prev_page_end_size); @@ -357,8 +368,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, } /* Writing to a new page, and creating a small hole from start * of page? Zero it out. */ - if ((i_size_read(page->mapping->host) == prev_page_end_size) - && (from != 0)) + if ((i_size_read(mapping->host) == prev_page_end_size) + && (pos != 0)) zero_user(page, 0, PAGE_CACHE_SIZE); out: return rc; @@ -445,21 +456,28 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) } /** - * ecryptfs_commit_write + * ecryptfs_write_end * @file: The eCryptfs file object + * @mapping: The eCryptfs object + * @pos: The file position + * @len: The length of the data (unused) + * @copied: The amount of data copied * @page: The eCryptfs page - * @from: Ignored (we rotate the page IV on each write) - * @to: Ignored + * @fsdata: The fsdata (unused) * * This is where we encrypt the data and pass the encrypted data to * the lower filesystem. In OpenPGP-compatible mode, we operate on * entire underlying packets. */ -static int ecryptfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ecryptfs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - loff_t pos; - struct inode *ecryptfs_inode = page->mapping->host; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned to = from + copied; + struct inode *ecryptfs_inode = mapping->host; struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; int rc; @@ -471,25 +489,22 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, } else ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" - "(page w/ index = [0x%.16x], to = [%d])\n", page->index, - to); + "(page w/ index = [0x%.16x], to = [%d])\n", index, to); /* Fills in zeros if 'to' goes beyond inode size */ rc = fill_zeros_to_end_of_page(page, to); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to fill " - "zeros in page with index = [0x%.16x]\n", - page->index); + "zeros in page with index = [0x%.16x]\n", index); goto out; } rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " - "index [0x%.16x])\n", page->index); + "index [0x%.16x])\n", index); goto out; } - pos = (((loff_t)page->index) << PAGE_CACHE_SHIFT) + to; - if (pos > i_size_read(ecryptfs_inode)) { - i_size_write(ecryptfs_inode, pos); + if (pos + copied > i_size_read(ecryptfs_inode)) { + i_size_write(ecryptfs_inode, pos + copied); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " "[0x%.16x]\n", i_size_read(ecryptfs_inode)); } @@ -497,7 +512,11 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, if (rc) printk(KERN_ERR "Error writing inode size to metadata; " "rc = [%d]\n", rc); + else + rc = copied; out: + unlock_page(page); + page_cache_release(page); return rc; } @@ -518,7 +537,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) struct address_space_operations ecryptfs_aops = { .writepage = ecryptfs_writepage, .readpage = ecryptfs_readpage, - .prepare_write = ecryptfs_prepare_write, - .commit_write = ecryptfs_commit_write, + .write_begin = ecryptfs_write_begin, + .write_end = ecryptfs_write_end, .bmap = ecryptfs_bmap, }; diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c deleted file mode 100644 index e0abad62b39..00000000000 --- a/fs/ecryptfs/netlink.c +++ /dev/null @@ -1,249 +0,0 @@ -/** - * eCryptfs: Linux filesystem encryption layer - * - * Copyright (C) 2004-2006 International Business Machines Corp. - * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> - * Tyler Hicks <tyhicks@ou.edu> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. - */ - -#include <net/sock.h> -#include <linux/hash.h> -#include <linux/random.h> -#include "ecryptfs_kernel.h" - -static struct sock *ecryptfs_nl_sock; - -/** - * ecryptfs_send_netlink - * @data: The data to include as the payload - * @data_len: The byte count of the data - * @msg_ctx: The netlink context that will be used to handle the - * response message - * @msg_type: The type of netlink message to send - * @msg_flags: The flags to include in the netlink header - * @daemon_pid: The process id of the daemon to send the message to - * - * Sends the data to the specified daemon pid and uses the netlink - * context element to store the data needed for validation upon - * receiving the response. The data and the netlink context can be - * null if just sending a netlink header is sufficient. Returns zero - * upon sending the message; non-zero upon error. - */ -int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, - u16 msg_flags, struct pid *daemon_pid) -{ - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct ecryptfs_message *msg; - size_t payload_len; - int rc; - - payload_len = ((data && data_len) ? (sizeof(*msg) + data_len) : 0); - skb = alloc_skb(NLMSG_SPACE(payload_len), GFP_KERNEL); - if (!skb) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); - goto out; - } - nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0, - msg_type, payload_len); - nlh->nlmsg_flags = msg_flags; - if (msg_ctx && payload_len) { - msg = (struct ecryptfs_message *)NLMSG_DATA(nlh); - msg->index = msg_ctx->index; - msg->data_len = data_len; - memcpy(msg->data, data, data_len); - } - rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0); - if (rc < 0) { - ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " - "message; rc = [%d]\n", rc); - goto out; - } - rc = 0; - goto out; -nlmsg_failure: - rc = -EMSGSIZE; - kfree_skb(skb); -out: - return rc; -} - -/** - * ecryptfs_process_nl_reponse - * @skb: The socket buffer containing the netlink message of state - * RESPONSE - * - * Processes a response message after sending a operation request to - * userspace. Attempts to assign the msg to a netlink context element - * at the index specified in the msg. The sk_buff and nlmsghdr must - * be validated before this function. Returns zero upon delivery to - * desired context element; non-zero upon delivery failure or error. - */ -static int ecryptfs_process_nl_response(struct sk_buff *skb) -{ - struct nlmsghdr *nlh = nlmsg_hdr(skb); - struct ecryptfs_message *msg = NLMSG_DATA(nlh); - struct pid *pid; - int rc; - - if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { - rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Received netlink message with " - "incorrectly specified data length\n"); - goto out; - } - pid = find_get_pid(NETLINK_CREDS(skb)->pid); - rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL, - pid, nlh->nlmsg_seq); - put_pid(pid); - if (rc) - printk(KERN_ERR - "Error processing response message; rc = [%d]\n", rc); -out: - return rc; -} - -/** - * ecryptfs_process_nl_helo - * @skb: The socket buffer containing the nlmsghdr in HELO state - * - * Gets uid and pid of the skb and adds the values to the daemon id - * hash. Returns zero after adding a new daemon id to the hash list; - * non-zero otherwise. - */ -static int ecryptfs_process_nl_helo(struct sk_buff *skb) -{ - struct pid *pid; - int rc; - - pid = find_get_pid(NETLINK_CREDS(skb)->pid); - rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, - NETLINK_CREDS(skb)->uid, NULL, pid); - put_pid(pid); - if (rc) - printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); - return rc; -} - -/** - * ecryptfs_process_nl_quit - * @skb: The socket buffer containing the nlmsghdr in QUIT state - * - * Gets uid and pid of the skb and deletes the corresponding daemon - * id, if it is the registered that is requesting the - * deletion. Returns zero after deleting the desired daemon id; - * non-zero otherwise. - */ -static int ecryptfs_process_nl_quit(struct sk_buff *skb) -{ - struct pid *pid; - int rc; - - pid = find_get_pid(NETLINK_CREDS(skb)->pid); - rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid); - put_pid(pid); - if (rc) - printk(KERN_WARNING - "Error processing QUIT message; rc = [%d]\n", rc); - return rc; -} - -/** - * ecryptfs_receive_nl_message - * - * Callback function called by netlink system when a message arrives. - * If the message looks to be valid, then an attempt is made to assign - * it to its desired netlink context element and wake up the process - * that is waiting for a response. - */ -static void ecryptfs_receive_nl_message(struct sk_buff *skb) -{ - struct nlmsghdr *nlh; - - nlh = nlmsg_hdr(skb); - if (!NLMSG_OK(nlh, skb->len)) { - ecryptfs_printk(KERN_ERR, "Received corrupt netlink " - "message\n"); - goto free; - } - switch (nlh->nlmsg_type) { - case ECRYPTFS_MSG_RESPONSE: - if (ecryptfs_process_nl_response(skb)) { - ecryptfs_printk(KERN_WARNING, "Failed to " - "deliver netlink response to " - "requesting operation\n"); - } - break; - case ECRYPTFS_MSG_HELO: - if (ecryptfs_process_nl_helo(skb)) { - ecryptfs_printk(KERN_WARNING, "Failed to " - "fulfill HELO request\n"); - } - break; - case ECRYPTFS_MSG_QUIT: - if (ecryptfs_process_nl_quit(skb)) { - ecryptfs_printk(KERN_WARNING, "Failed to " - "fulfill QUIT request\n"); - } - break; - default: - ecryptfs_printk(KERN_WARNING, "Dropping netlink " - "message of unrecognized type [%d]\n", - nlh->nlmsg_type); - break; - } -free: - kfree_skb(skb); -} - -/** - * ecryptfs_init_netlink - * - * Initializes the daemon id hash list, netlink context array, and - * necessary locks. Returns zero upon success; non-zero upon error. - */ -int ecryptfs_init_netlink(void) -{ - int rc; - - ecryptfs_nl_sock = netlink_kernel_create(&init_net, NETLINK_ECRYPTFS, 0, - ecryptfs_receive_nl_message, - NULL, THIS_MODULE); - if (!ecryptfs_nl_sock) { - rc = -EIO; - ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n"); - goto out; - } - ecryptfs_nl_sock->sk_sndtimeo = ECRYPTFS_DEFAULT_SEND_TIMEOUT; - rc = 0; -out: - return rc; -} - -/** - * ecryptfs_release_netlink - * - * Frees all memory used by the netlink context array and releases the - * netlink socket. - */ -void ecryptfs_release_netlink(void) -{ - netlink_kernel_release(ecryptfs_nl_sock); - ecryptfs_nl_sock = NULL; -} diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7cc0eb756b5..99368bda026 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -927,14 +927,11 @@ errxit: /* * During the time we spent in the loop above, some other events * might have been queued by the poll callback. We re-insert them - * here (in case they are not already queued, or they're one-shot). + * inside the main ready-list here. */ for (nepi = ep->ovflist; (epi = nepi) != NULL; - nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { - if (!ep_is_linked(&epi->rdllink) && - (epi->event.events & ~EP_PRIVATE_BITS)) - list_add_tail(&epi->rdllink, &ep->rdllist); - } + nepi = epi->next, epi->next = EP_UNACTIVE_PTR) + list_add_tail(&epi->rdllink, &ep->rdllist); /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after * releasing the lock, events will be queued in the normal way inside diff --git a/fs/exec.c b/fs/exec.c index cecee501ce7..a41e7902ed0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -50,15 +50,12 @@ #include <linux/cn_proc.h> #include <linux/audit.h> #include <linux/tracehook.h> +#include <linux/kmod.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/tlb.h> -#ifdef CONFIG_KMOD -#include <linux/kmod.h> -#endif - #ifdef __alpha__ /* for /sbin/loader handling in search_binary_handler() */ #include <linux/a.out.h> @@ -391,7 +388,7 @@ static int count(char __user * __user * argv, int max) if (!p) break; argv++; - if(++i > max) + if (i++ >= max) return -E2BIG; cond_resched(); } @@ -825,8 +822,6 @@ static int de_thread(struct task_struct *tsk) schedule(); } - if (unlikely(task_child_reaper(tsk) == leader)) - task_active_pid_ns(tsk)->child_reaper = tsk; /* * The only record we have of the real-time age of a * process, regardless of execs it's done, is start_time. @@ -1189,7 +1184,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) return retval; /* Remember if the application is TASO. */ - bprm->sh_bang = eh->ah.entry < 0x100000000UL; + bprm->taso = eh->ah.entry < 0x100000000UL; bprm->file = file; bprm->loader = loader; @@ -1247,8 +1242,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) read_unlock(&binfmt_lock); if (retval != -ENOEXEC || bprm->mm == NULL) { break; -#ifdef CONFIG_KMOD - }else{ +#ifdef CONFIG_MODULES + } else { #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 10bb02c3f25..6dac7ba2d22 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1295,6 +1295,7 @@ retry_alloc: * turn off reservation for this allocation */ if (my_rsv && (free_blocks < windowsz) + && (free_blocks > 0) && (rsv_is_empty(&my_rsv->rsv_window))) my_rsv = NULL; @@ -1332,7 +1333,7 @@ retry_alloc: * free blocks is less than half of the reservation * window size. */ - if (free_blocks <= (windowsz/2)) + if (my_rsv && (free_blocks <= (windowsz/2))) continue; brelse(bitmap_bh); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index a78c6b4af06..11a49ce8439 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -103,7 +103,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) return err; } -static void ext2_check_page(struct page *page) +static void ext2_check_page(struct page *page, int quiet) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; @@ -146,10 +146,10 @@ out: /* Too bad, we had an error */ Ebadsize: - ext2_error(sb, "ext2_check_page", - "size of directory #%lu is not a multiple of chunk size", - dir->i_ino - ); + if (!quiet) + ext2_error(sb, __func__, + "size of directory #%lu is not a multiple " + "of chunk size", dir->i_ino); goto fail; Eshort: error = "rec_len is smaller than minimal"; @@ -166,32 +166,36 @@ Espan: Einumber: error = "inode out of bounds"; bad_entry: - ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " - "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, - (unsigned long) le32_to_cpu(p->inode), - rec_len, p->name_len); + if (!quiet) + ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, + (unsigned long) le32_to_cpu(p->inode), + rec_len, p->name_len); goto fail; Eend: - p = (ext2_dirent *)(kaddr + offs); - ext2_error (sb, "ext2_check_page", - "entry in directory #%lu spans the page boundary" - "offset=%lu, inode=%lu", - dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, - (unsigned long) le32_to_cpu(p->inode)); + if (!quiet) { + p = (ext2_dirent *)(kaddr + offs); + ext2_error(sb, "ext2_check_page", + "entry in directory #%lu spans the page boundary" + "offset=%lu, inode=%lu", + dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, + (unsigned long) le32_to_cpu(p->inode)); + } fail: SetPageChecked(page); SetPageError(page); } -static struct page * ext2_get_page(struct inode *dir, unsigned long n) +static struct page * ext2_get_page(struct inode *dir, unsigned long n, + int quiet) { struct address_space *mapping = dir->i_mapping; struct page *page = read_mapping_page(mapping, n, NULL); if (!IS_ERR(page)) { kmap(page); if (!PageChecked(page)) - ext2_check_page(page); + ext2_check_page(page, quiet); if (PageError(page)) goto fail; } @@ -292,7 +296,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; ext2_dirent *de; - struct page *page = ext2_get_page(inode, n); + struct page *page = ext2_get_page(inode, n, 0); if (IS_ERR(page)) { ext2_error(sb, __func__, @@ -361,6 +365,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, struct page *page = NULL; struct ext2_inode_info *ei = EXT2_I(dir); ext2_dirent * de; + int dir_has_error = 0; if (npages == 0) goto out; @@ -374,7 +379,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, n = start; do { char *kaddr; - page = ext2_get_page(dir, n); + page = ext2_get_page(dir, n, dir_has_error); if (!IS_ERR(page)) { kaddr = page_address(page); de = (ext2_dirent *) kaddr; @@ -391,7 +396,9 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, de = ext2_next_entry(de); } ext2_put_page(page); - } + } else + dir_has_error = 1; + if (++n >= npages) n = 0; /* next page is past the blocks we've got */ @@ -414,7 +421,7 @@ found: struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) { - struct page *page = ext2_get_page(dir, 0); + struct page *page = ext2_get_page(dir, 0, 0); ext2_dirent *de = NULL; if (!IS_ERR(page)) { @@ -487,7 +494,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) for (n = 0; n <= npages; n++) { char *dir_end; - page = ext2_get_page(dir, n); + page = ext2_get_page(dir, n, 0); err = PTR_ERR(page); if (IS_ERR(page)) goto out; @@ -655,14 +662,17 @@ int ext2_empty_dir (struct inode * inode) { struct page *page = NULL; unsigned long i, npages = dir_pages(inode); + int dir_has_error = 0; for (i = 0; i < npages; i++) { char *kaddr; ext2_dirent * de; - page = ext2_get_page(inode, i); + page = ext2_get_page(inode, i, dir_has_error); - if (IS_ERR(page)) + if (IS_ERR(page)) { + dir_has_error = 1; continue; + } kaddr = page_address(page); de = (ext2_dirent *)kaddr; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 25adfc3c693..d0ff0b8cf30 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -8,7 +8,7 @@ * pages against inodes. ie: data writeback. Writeout of the * inode itself is not handled here. * - * 10Apr2002 akpm@zip.com.au + * 10Apr2002 Andrew Morton * Split out of fs/inode.c * Additions for address_space-based writeback */ diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index ba851576ebb..6d98f116ca0 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -190,6 +190,10 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid, fd->search_key->cat.ParID = rec.thread.ParID; len = fd->search_key->cat.CName.len = rec.thread.CName.len; + if (len > HFS_NAMELEN) { + printk(KERN_ERR "hfs: bad catalog namelength\n"); + return -EIO; + } memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); return hfs_brec_find(fd); } diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index d128a25b74d..ea30afc2a03 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -32,6 +32,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); + if (IS_ERR(page)) { + start = size; + goto out; + } pptr = kmap(page); curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; i = offset % 32; @@ -73,6 +77,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma break; page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); + if (IS_ERR(page)) { + start = size; + goto out; + } curr = pptr = kmap(page); if ((size ^ offset) / PAGE_CACHE_BITS) end = pptr + PAGE_CACHE_BITS / 32; @@ -120,6 +128,10 @@ found: offset += PAGE_CACHE_BITS; page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); + if (IS_ERR(page)) { + start = size; + goto out; + } pptr = kmap(page); curr = pptr; end = pptr + PAGE_CACHE_BITS / 32; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index ba117c445e7..f6874acb2cf 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -168,6 +168,11 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, return -EIO; } + if (be16_to_cpu(tmp.thread.nodeName.length) > 255) { + printk(KERN_ERR "hfs: catalog name length corrupted\n"); + return -EIO; + } + hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), &tmp.thread.nodeName); return hfs_brec_find(fd); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index e834e578c93..eb74531a0a8 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -356,7 +356,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; - } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { + } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) { printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " "use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; diff --git a/fs/mpage.c b/fs/mpage.c index dbcc7af76a1..552b80b3fac 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -6,7 +6,7 @@ * Contains functions related to preparing and submitting BIOs which contain * multiple pagecache pages. * - * 15May2002 akpm@zip.com.au + * 15May2002 Andrew Morton * Initial version * 27Jun2002 axboe@suse.de * use bio_add_page() to build bio's just the right size diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 64965e1c21c..9b0efdad891 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -13,9 +13,7 @@ #include <linux/nls.h> #include <linux/kernel.h> #include <linux/errno.h> -#ifdef CONFIG_KMOD #include <linux/kmod.h> -#endif #include <linux/spinlock.h> static struct nls_table default_table; @@ -215,24 +213,7 @@ static struct nls_table *find_nls(char *charset) struct nls_table *load_nls(char *charset) { - struct nls_table *nls; -#ifdef CONFIG_KMOD - int ret; -#endif - - nls = find_nls(charset); - if (nls) - return nls; - -#ifdef CONFIG_KMOD - ret = request_module("nls_%s", charset); - if (ret != 0) { - printk("Unable to load NLS charset %s\n", charset); - return NULL; - } - nls = find_nls(charset); -#endif - return nls; + return try_then_request_module(find_nls(charset), "nls_%s", charset); } void unload_nls(struct nls_table *nls) diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index 3d3e1663147..a97b477ac0f 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c @@ -275,16 +275,6 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) id = data[0x1fc] & 15; put_dev_sector(sect); -#ifdef CONFIG_BLK_DEV_MFM - if (MAJOR(bdev->bd_dev) == MFM_ACORN_MAJOR) { - extern void xd_set_geometry(struct block_device *, - unsigned char, unsigned char, unsigned int); - xd_set_geometry(bdev, dr->secspertrack, heads, 1); - invalidate_bh_lrus(); - truncate_inode_pages(bdev->bd_inode->i_mapping, 0); - } -#endif - /* * Work out start of non-adfs partition. */ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7408227c49c..fbeb2f372a9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -538,10 +538,23 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) sector_t from = state->parts[p].from; if (!size) continue; + if (from >= get_capacity(disk)) { + printk(KERN_WARNING + "%s: p%d ignored, start %llu is behind the end of the disk\n", + disk->disk_name, p, (unsigned long long) from); + continue; + } if (from + size > get_capacity(disk)) { + /* + * we can not ignore partitions of broken tables + * created by for example camera firmware, but we + * limit them to the end of the disk to avoid + * creating invalid block devices + */ printk(KERN_WARNING - "%s: p%d exceeds device capacity\n", - disk->disk_name, p); + "%s: p%d size %llu limited to end of disk\n", + disk->disk_name, p, (unsigned long long) size); + size = get_capacity(disk) - from; } res = add_partition(disk, p, from, size, state->parts[p].flags); if (res) { diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index b675a49c182..59ea42e1ef0 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -45,7 +45,6 @@ #include <linux/blkdev.h> #include <linux/hugetlb.h> #include <linux/jiffies.h> -#include <linux/sysrq.h> #include <linux/vmalloc.h> #include <linux/crash_dump.h> #include <linux/pid_namespace.h> @@ -704,28 +703,6 @@ static int execdomains_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } -#ifdef CONFIG_MAGIC_SYSRQ -/* - * writing 'C' to /proc/sysrq-trigger is like sysrq-C - */ -static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - if (count) { - char c; - - if (get_user(c, buf)) - return -EFAULT; - __handle_sysrq(c, NULL, 0); - } - return count; -} - -static const struct file_operations proc_sysrq_trigger_operations = { - .write = write_sysrq_trigger, -}; -#endif - #ifdef CONFIG_PROC_PAGE_MONITOR #define KPMSIZE sizeof(u64) #define KPMMASK (KPMSIZE - 1) @@ -934,7 +911,4 @@ void __init proc_misc_init(void) #ifdef CONFIG_PROC_VMCORE proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); #endif -#ifdef CONFIG_MAGIC_SYSRQ - proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations); -#endif } diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index b9dbeeca704..37173fa07d1 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -8,8 +8,6 @@ /* proc info support a la one created by Sizif@Botik.RU for PGC */ -/* $Id: procfs.c,v 1.1.8.2 2001/07/15 17:08:42 god Exp $ */ - #include <linux/module.h> #include <linux/time.h> #include <linux/seq_file.h> @@ -621,7 +619,6 @@ int reiserfs_global_version_in_proc(char *buffer, char **start, #endif /* - * $Log: procfs.c,v $ * Revision 1.1.8.2 2001/07/15 17:08:42 god * . use get_super() in procfs.c * . remove remove_save_link() from reiserfs_do_truncate() diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index bb3cb5b7cdb..ad92461cbfc 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -155,7 +155,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, xadir = open_xa_dir(inode, flags); if (IS_ERR(xadir)) { return ERR_CAST(xadir); - } else if (xadir && !xadir->d_inode) { + } else if (!xadir->d_inode) { dput(xadir); return ERR_PTR(-ENODATA); } diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 006fc64227d..66f6e58a7e4 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -61,6 +61,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) int size = dentry->d_inode->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); + char *temp; if (size) { if (offs > size) @@ -69,23 +70,33 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) count = size - offs; } + temp = kmalloc(count, GFP_KERNEL); + if (!temp) + return -ENOMEM; + mutex_lock(&bb->mutex); count = fill_read(dentry, bb->buffer, offs, count); - if (count < 0) - goto out_unlock; + if (count < 0) { + mutex_unlock(&bb->mutex); + goto out_free; + } - if (copy_to_user(userbuf, bb->buffer, count)) { + memcpy(temp, bb->buffer, count); + + mutex_unlock(&bb->mutex); + + if (copy_to_user(userbuf, temp, count)) { count = -EFAULT; - goto out_unlock; + goto out_free; } pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); *off = offs + count; - out_unlock: - mutex_unlock(&bb->mutex); + out_free: + kfree(temp); return count; } @@ -118,6 +129,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, int size = dentry->d_inode->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); + char *temp; if (size) { if (offs > size) @@ -126,19 +138,27 @@ static ssize_t write(struct file *file, const char __user *userbuf, count = size - offs; } - mutex_lock(&bb->mutex); + temp = kmalloc(count, GFP_KERNEL); + if (!temp) + return -ENOMEM; - if (copy_from_user(bb->buffer, userbuf, count)) { + if (copy_from_user(temp, userbuf, count)) { count = -EFAULT; - goto out_unlock; + goto out_free; } + mutex_lock(&bb->mutex); + + memcpy(bb->buffer, temp, count); + count = flush_write(dentry, bb->buffer, offs, count); + mutex_unlock(&bb->mutex); + if (count > 0) *off = offs + count; - out_unlock: - mutex_unlock(&bb->mutex); +out_free: + kfree(temp); return count; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index aedaeba82ae..3a05a596e3b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -370,17 +370,17 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, memset(acxt, 0, sizeof(*acxt)); acxt->parent_sd = parent_sd; - /* Lookup parent inode. inode initialization and I_NEW - * clearing are protected by sysfs_mutex. By grabbing it and - * looking up with _nowait variant, inode state can be - * determined reliably. + /* Lookup parent inode. inode initialization is protected by + * sysfs_mutex, so inode existence can be determined by + * looking up inode while holding sysfs_mutex. */ mutex_lock(&sysfs_mutex); - inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, - parent_sd); + inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, + parent_sd); + if (inode) { + WARN_ON(inode->i_state & I_NEW); - if (inode && !(inode->i_state & I_NEW)) { /* parent inode available */ acxt->parent_inode = inode; @@ -393,8 +393,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, mutex_lock(&inode->i_mutex); mutex_lock(&sysfs_mutex); } - } else - iput(inode); + } } /** @@ -636,6 +635,7 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, return sd; } +EXPORT_SYMBOL_GPL(sysfs_get_dirent); static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, const char *name, struct sysfs_dirent **p_sd) @@ -829,16 +829,12 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) if (!new_dentry) goto out_unlock; - /* rename kobject and sysfs_dirent */ + /* rename sysfs_dirent */ error = -ENOMEM; new_name = dup_name = kstrdup(new_name, GFP_KERNEL); if (!new_name) goto out_unlock; - error = kobject_set_name(kobj, "%s", new_name); - if (error) - goto out_unlock; - dup_name = sd->s_name; sd->s_name = new_name; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index c9e4e5091da..1f4a3f87726 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -19,10 +19,18 @@ #include <linux/poll.h> #include <linux/list.h> #include <linux/mutex.h> +#include <linux/limits.h> #include <asm/uaccess.h> #include "sysfs.h" +/* used in crash dumps to help with debugging */ +static char last_sysfs_file[PATH_MAX]; +void sysfs_printk_last_file(void) +{ + printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file); +} + /* * There's one sysfs_buffer for each open file and one * sysfs_open_dirent for each sysfs_dirent with one or more open @@ -328,6 +336,11 @@ static int sysfs_open_file(struct inode *inode, struct file *file) struct sysfs_buffer *buffer; struct sysfs_ops *ops; int error = -EACCES; + char *p; + + p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); + if (p) + memmove(last_sysfs_file, p, strlen(p) + 1); /* need attr_sd for attr and ops, its parent for kobj */ if (!sysfs_get_active_two(attr_sd)) @@ -440,7 +453,23 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) return POLLERR|POLLPRI; } -void sysfs_notify(struct kobject *k, char *dir, char *attr) +void sysfs_notify_dirent(struct sysfs_dirent *sd) +{ + struct sysfs_open_dirent *od; + + spin_lock(&sysfs_open_dirent_lock); + + od = sd->s_attr.open; + if (od) { + atomic_inc(&od->event); + wake_up_interruptible(&od->poll); + } + + spin_unlock(&sysfs_open_dirent_lock); +} +EXPORT_SYMBOL_GPL(sysfs_notify_dirent); + +void sysfs_notify(struct kobject *k, const char *dir, const char *attr) { struct sysfs_dirent *sd = k->sd; @@ -450,19 +479,8 @@ void sysfs_notify(struct kobject *k, char *dir, char *attr) sd = sysfs_find_dirent(sd, dir); if (sd && attr) sd = sysfs_find_dirent(sd, attr); - if (sd) { - struct sysfs_open_dirent *od; - - spin_lock(&sysfs_open_dirent_lock); - - od = sd->s_attr.open; - if (od) { - atomic_inc(&od->event); - wake_up_interruptible(&od->poll); - } - - spin_unlock(&sysfs_open_dirent_lock); - } + if (sd) + sysfs_notify_dirent(sd); mutex_unlock(&sysfs_mutex); } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 14f0023984d..ab343e371d6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -16,6 +16,7 @@ #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/init.h> +#include <linux/module.h> #include "sysfs.h" @@ -115,3 +116,17 @@ out_err: sysfs_dir_cachep = NULL; goto out; } + +#undef sysfs_get +struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) +{ + return __sysfs_get(sd); +} +EXPORT_SYMBOL_GPL(sysfs_get); + +#undef sysfs_put +void sysfs_put(struct sysfs_dirent *sd) +{ + __sysfs_put(sd); +} +EXPORT_SYMBOL_GPL(sysfs_put); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a5db496f71c..93c6d6b27c4 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -124,7 +124,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd); void sysfs_remove_subdir(struct sysfs_dirent *sd); -static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) +static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) { if (sd) { WARN_ON(!atomic_read(&sd->s_count)); @@ -132,12 +132,14 @@ static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) } return sd; } +#define sysfs_get(sd) __sysfs_get(sd) -static inline void sysfs_put(struct sysfs_dirent *sd) +static inline void __sysfs_put(struct sysfs_dirent *sd) { if (sd && atomic_dec_and_test(&sd->s_count)) release_sysfs_dirent(sd); } +#define sysfs_put(sd) __sysfs_put(sd) /* * inode.c |