diff options
Diffstat (limited to 'fs')
66 files changed, 1151 insertions, 533 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index d384a8b77ee..aa5ecf479a5 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -183,7 +183,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) else flock.length = fl->fl_end - fl->fl_start + 1; flock.proc_id = fl->fl_pid; - flock.client_id = utsname()->nodename; + flock.client_id = fid->clnt->name; if (IS_SETLKW(cmd)) flock.flags = P9_LOCK_FLAGS_BLOCK; @@ -260,7 +260,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) else glock.length = fl->fl_end - fl->fl_start + 1; glock.proc_id = fl->fl_pid; - glock.client_id = utsname()->nodename; + glock.client_id = fid->clnt->name; res = p9_client_getlock_dotl(fid, &glock); if (res < 0) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 25b018efb8a..94de6d1482e 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -146,7 +146,7 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, char type = 0, ext[32]; int major = -1, minor = -1; - strncpy(ext, stat->extension, sizeof(ext)); + strlcpy(ext, stat->extension, sizeof(ext)); sscanf(ext, "%c %u %u", &type, &major, &minor); switch (type) { case 'c': @@ -1186,7 +1186,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, * this even with .u extension. So check * for non NULL stat->extension */ - strncpy(ext, stat->extension, sizeof(ext)); + strlcpy(ext, stat->extension, sizeof(ext)); /* HARDLINKCOUNT %u */ sscanf(ext, "%13s %u", tag_name, &i_nlink); if (!strncmp(tag_name, "HARDLINKCOUNT", 13)) diff --git a/fs/affs/file.c b/fs/affs/file.c index af3261b7810..776e3935a75 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -836,7 +836,7 @@ affs_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; int res; res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0b74d3176ab..646337dc520 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -751,10 +751,6 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) _enter("{%x:%u},{%s},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -816,10 +812,6 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -936,10 +928,6 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, _enter("{%x:%u},{%s},%ho,", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -1005,10 +993,6 @@ static int afs_link(struct dentry *from, struct inode *dir, dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -1053,10 +1037,6 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, content); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - ret = -EINVAL; if (strlen(content) >= AFSPATHMAX) goto error; @@ -1127,10 +1107,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dvnode->fid.vid, new_dvnode->fid.vnode, new_dentry->d_name.name); - ret = -ENAMETOOLONG; - if (new_dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(orig_dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 743c7c2c949..0f00da329e7 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -183,13 +183,14 @@ static int autofs_dev_ioctl_protosubver(struct file *fp, return 0; } +/* Find the topmost mount satisfying test() */ static int find_autofs_mount(const char *pathname, struct path *res, int test(struct path *path, void *data), void *data) { struct path path; - int err = kern_path(pathname, 0, &path); + int err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); if (err) return err; err = -ENOENT; @@ -197,10 +198,9 @@ static int find_autofs_mount(const char *pathname, if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) { if (test(&path, data)) { path_get(&path); - if (!err) /* already found some */ - path_put(res); *res = path; err = 0; + break; } } if (!follow_up(&path)) @@ -486,12 +486,11 @@ static int autofs_dev_ioctl_askumount(struct file *fp, * mount if there is one or 0 if it isn't a mountpoint. * * If we aren't supplied with a file descriptor then we - * lookup the nameidata of the path and check if it is the - * root of a mount. If a type is given we are looking for - * a particular autofs mount and if we don't find a match - * we return fail. If the located nameidata path is the - * root of a mount we return 1 along with the super magic - * of the mount or 0 otherwise. + * lookup the path and check if it is the root of a mount. + * If a type is given we are looking for a particular autofs + * mount and if we don't find a match we return fail. If the + * located path is the root of a mount we return 1 along with + * the super magic of the mount or 0 otherwise. * * In both cases the the device number (as returned by * new_encode_dev()) is also returned. @@ -519,9 +518,11 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, if (!fp || param->ioctlfd == -1) { if (autofs_type_any(type)) - err = kern_path(name, LOOKUP_FOLLOW, &path); + err = kern_path_mountpoint(AT_FDCWD, + name, &path, LOOKUP_FOLLOW); else - err = find_autofs_mount(name, &path, test_by_type, &type); + err = find_autofs_mount(name, &path, + test_by_type, &type); if (err) goto out; devid = new_encode_dev(path.dentry->d_sb->s_dev); diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8fb42916d8a..60250847929 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -716,13 +716,14 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size) return 0; bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); - - bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); - if (!bs->bvec_integrity_pool) + if (!bs->bio_integrity_pool) return -1; - if (!bs->bio_integrity_pool) + bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_integrity_pool) { + mempool_destroy(bs->bio_integrity_pool); return -1; + } return 0; } diff --git a/fs/coredump.c b/fs/coredump.c index 72f816d6cad..9bdeca12ae0 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -190,6 +190,11 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) err = cn_printf(cn, "%d", task_tgid_vnr(current)); break; + /* global pid */ + case 'P': + err = cn_printf(cn, "%d", + task_tgid_nr(current)); + break; /* uid */ case 'u': err = cn_printf(cn, "%d", cred->uid); diff --git a/fs/dcache.c b/fs/dcache.c index ca8e9cd60f8..4d9df3c940e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; +/** + * read_seqbegin_or_lock - begin a sequence number check or locking block + * lock: sequence lock + * seq : sequence number to be checked + * + * First try it once optimistically without taking the lock. If that fails, + * take the lock. The sequence number is also used as a marker for deciding + * whether to be a reader (even) or writer (odd). + * N.B. seq must be initialized to an even number to begin with. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + write_seqlock(lock); +} + +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + write_sequnlock(lock); +} + /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -1012,7 +1041,7 @@ void shrink_dcache_for_umount(struct super_block *sb) * the parenthood after dropping the lock and check * that the sequence number still matches. */ -static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) +static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) { struct dentry *new = old->d_parent; @@ -1026,7 +1055,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq */ if (new != old->d_parent || (old->d_flags & DCACHE_DENTRY_KILLED) || - (!locked && read_seqretry(&rename_lock, seq))) { + need_seqretry(&rename_lock, seq)) { spin_unlock(&new->d_lock); new = NULL; } @@ -1063,13 +1092,12 @@ static void d_walk(struct dentry *parent, void *data, { struct dentry *this_parent; struct list_head *next; - unsigned seq; - int locked = 0; + unsigned seq = 0; enum d_walk_ret ret; bool retry = true; - seq = read_seqbegin(&rename_lock); again: + read_seqbegin_or_lock(&rename_lock, &seq); this_parent = parent; spin_lock(&this_parent->d_lock); @@ -1123,13 +1151,13 @@ resume: */ if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, locked, seq); + this_parent = try_to_ascend(this_parent, seq); if (!this_parent) goto rename_retry; next = child->d_u.d_child.next; goto resume; } - if (!locked && read_seqretry(&rename_lock, seq)) { + if (need_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); goto rename_retry; } @@ -1138,17 +1166,13 @@ resume: out_unlock: spin_unlock(&this_parent->d_lock); - if (locked) - write_sequnlock(&rename_lock); + done_seqretry(&rename_lock, seq); return; rename_retry: if (!retry) return; - if (locked) - goto again; - locked = 1; - write_seqlock(&rename_lock); + seq = 1; goto again; } @@ -2647,9 +2671,39 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) return 0; } +/** + * prepend_name - prepend a pathname in front of current buffer pointer + * buffer: buffer pointer + * buflen: allocated length of the buffer + * name: name string and length qstr structure + * + * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to + * make sure that either the old or the new name pointer and length are + * fetched. However, there may be mismatch between length and pointer. + * The length cannot be trusted, we need to copy it byte-by-byte until + * the length is reached or a null byte is found. It also prepends "/" at + * the beginning of the name. The sequence number check at the caller will + * retry it again when a d_move() does happen. So any garbage in the buffer + * due to mismatched pointer and length will be discarded. + */ static int prepend_name(char **buffer, int *buflen, struct qstr *name) { - return prepend(buffer, buflen, name->name, name->len); + const char *dname = ACCESS_ONCE(name->name); + u32 dlen = ACCESS_ONCE(name->len); + char *p; + + if (*buflen < dlen + 1) + return -ENAMETOOLONG; + *buflen -= dlen + 1; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + if (!c) + break; + *p++ = c; + } + return 0; } /** @@ -2659,7 +2713,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * Caller holds the rename_lock. + * The function tries to write out the pathname without taking any lock other + * than the RCU read lock to make sure that dentries won't go away. It only + * checks the sequence number of the global rename_lock as any change in the + * dentry's d_seq will be preceded by changes in the rename_lock sequence + * number. If the sequence number had been change, it will restart the whole + * pathname back-tracing sequence again. It performs a total of 3 trials of + * lockless back-tracing sequences before falling back to take the + * rename_lock. */ static int prepend_path(const struct path *path, const struct path *root, @@ -2668,54 +2729,66 @@ static int prepend_path(const struct path *path, struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; struct mount *mnt = real_mount(vfsmnt); - bool slash = false; int error = 0; + unsigned seq = 0; + char *bptr; + int blen; + rcu_read_lock(); +restart: + bptr = *buffer; + blen = *buflen; + read_seqbegin_or_lock(&rename_lock, &seq); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { /* Global root? */ - if (!mnt_has_parent(mnt)) - goto global_root; - dentry = mnt->mnt_mountpoint; - mnt = mnt->mnt_parent; - vfsmnt = &mnt->mnt; - continue; + if (mnt_has_parent(mnt)) { + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + vfsmnt = &mnt->mnt; + continue; + } + /* + * Filesystems needing to implement special "root names" + * should do so with ->d_dname() + */ + if (IS_ROOT(dentry) && + (dentry->d_name.len != 1 || + dentry->d_name.name[0] != '/')) { + WARN(1, "Root dentry has weird name <%.*s>\n", + (int) dentry->d_name.len, + dentry->d_name.name); + } + if (!error) + error = is_mounted(vfsmnt) ? 1 : 2; + break; } parent = dentry->d_parent; prefetch(parent); - spin_lock(&dentry->d_lock); - error = prepend_name(buffer, buflen, &dentry->d_name); - spin_unlock(&dentry->d_lock); - if (!error) - error = prepend(buffer, buflen, "/", 1); + error = prepend_name(&bptr, &blen, &dentry->d_name); if (error) break; - slash = true; dentry = parent; } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); - if (!error && !slash) - error = prepend(buffer, buflen, "/", 1); - - return error; - -global_root: - /* - * Filesystems needing to implement special "root names" - * should do so with ->d_dname() - */ - if (IS_ROOT(dentry) && - (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { - WARN(1, "Root dentry has weird name <%.*s>\n", - (int) dentry->d_name.len, dentry->d_name.name); - } - if (!slash) - error = prepend(buffer, buflen, "/", 1); - if (!error) - error = is_mounted(vfsmnt) ? 1 : 2; + if (error >= 0 && bptr == *buffer) { + if (--blen < 0) + error = -ENAMETOOLONG; + else + *--bptr = '/'; + } + *buffer = bptr; + *buflen = blen; return error; } @@ -2744,9 +2817,7 @@ char *__d_path(const struct path *path, prepend(&res, &buflen, "\0", 1); br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error < 0) @@ -2765,9 +2836,7 @@ char *d_absolute_path(const struct path *path, prepend(&res, &buflen, "\0", 1); br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); error = prepend_path(path, &root, &res, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error > 1) @@ -2833,9 +2902,7 @@ char *d_path(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); error = path_with_deleted(path, &root, &res, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error < 0) res = ERR_PTR(error); @@ -2870,10 +2937,10 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) char *end = buffer + buflen; /* these dentries are never renamed, so d_lock is not needed */ if (prepend(&end, &buflen, " (deleted)", 11) || - prepend_name(&end, &buflen, &dentry->d_name) || + prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || prepend(&end, &buflen, "/", 1)) end = ERR_PTR(-ENAMETOOLONG); - return end; + return end; } /* @@ -2881,30 +2948,42 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) */ static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) { - char *end = buf + buflen; - char *retval; + char *end, *retval; + int len, seq = 0; + int error = 0; - prepend(&end, &buflen, "\0", 1); + rcu_read_lock(); +restart: + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); if (buflen < 1) goto Elong; /* Get '/' right */ retval = end-1; *retval = '/'; - + read_seqbegin_or_lock(&rename_lock, &seq); while (!IS_ROOT(dentry)) { struct dentry *parent = dentry->d_parent; int error; prefetch(parent); - spin_lock(&dentry->d_lock); - error = prepend_name(&end, &buflen, &dentry->d_name); - spin_unlock(&dentry->d_lock); - if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) - goto Elong; + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; retval = end; dentry = parent; } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; return retval; Elong: return ERR_PTR(-ENAMETOOLONG); @@ -2912,13 +2991,7 @@ Elong: char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) { - char *retval; - - write_seqlock(&rename_lock); - retval = __dentry_path(dentry, buf, buflen); - write_sequnlock(&rename_lock); - - return retval; + return __dentry_path(dentry, buf, buflen); } EXPORT_SYMBOL(dentry_path_raw); @@ -2927,7 +3000,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *p = NULL; char *retval; - write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; if (prepend(&p, &buflen, "//deleted", 10) != 0) @@ -2935,7 +3007,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) buflen++; } retval = __dentry_path(dentry, buf, buflen); - write_sequnlock(&rename_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ return retval; @@ -2974,7 +3045,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -ENOENT; br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; char *cwd = page + PAGE_SIZE; @@ -2982,7 +3052,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error < 0) @@ -3003,7 +3072,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -EFAULT; } } else { - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 293f86741dd..473e09da7d0 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -740,6 +740,7 @@ static void ep_free(struct eventpoll *ep) epi = rb_entry(rbp, struct epitem, rbn); ep_unregister_pollwait(ep, epi); + cond_resched(); } /* @@ -754,6 +755,7 @@ static void ep_free(struct eventpoll *ep) while ((rbp = rb_first(&ep->rbr)) != NULL) { epi = rb_entry(rbp, struct epitem, rbn); ep_remove(ep, epi); + cond_resched(); } mutex_unlock(&ep->mtx); diff --git a/fs/exec.c b/fs/exec.c index fd774c7cb48..8875dd10ae7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -74,6 +74,8 @@ static DEFINE_RWLOCK(binfmt_lock); void __register_binfmt(struct linux_binfmt * fmt, int insert) { BUG_ON(!fmt); + if (WARN_ON(!fmt->load_binary)) + return; write_lock(&binfmt_lock); insert ? list_add(&fmt->lh, &formats) : list_add_tail(&fmt->lh, &formats); @@ -266,7 +268,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; + vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); @@ -1365,18 +1367,18 @@ out: } EXPORT_SYMBOL(remove_arg_zero); +#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) /* * cycle the list of binary formats handler, until one recognizes the image */ int search_binary_handler(struct linux_binprm *bprm) { - unsigned int depth = bprm->recursion_depth; - int try,retval; + bool need_retry = IS_ENABLED(CONFIG_MODULES); struct linux_binfmt *fmt; - pid_t old_pid, old_vpid; + int retval; /* This allows 4 levels of binfmt rewrites before failing hard. */ - if (depth > 5) + if (bprm->recursion_depth > 5) return -ELOOP; retval = security_bprm_check(bprm); @@ -1387,71 +1389,67 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval) return retval; + retval = -ENOENT; + retry: + read_lock(&binfmt_lock); + list_for_each_entry(fmt, &formats, lh) { + if (!try_module_get(fmt->module)) + continue; + read_unlock(&binfmt_lock); + bprm->recursion_depth++; + retval = fmt->load_binary(bprm); + bprm->recursion_depth--; + if (retval >= 0 || retval != -ENOEXEC || + bprm->mm == NULL || bprm->file == NULL) { + put_binfmt(fmt); + return retval; + } + read_lock(&binfmt_lock); + put_binfmt(fmt); + } + read_unlock(&binfmt_lock); + + if (need_retry && retval == -ENOEXEC) { + if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && + printable(bprm->buf[2]) && printable(bprm->buf[3])) + return retval; + if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0) + return retval; + need_retry = false; + goto retry; + } + + return retval; +} +EXPORT_SYMBOL(search_binary_handler); + +static int exec_binprm(struct linux_binprm *bprm) +{ + pid_t old_pid, old_vpid; + int ret; + /* Need to fetch pid before load_binary changes it */ old_pid = current->pid; rcu_read_lock(); old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); rcu_read_unlock(); - retval = -ENOENT; - for (try=0; try<2; try++) { - read_lock(&binfmt_lock); - list_for_each_entry(fmt, &formats, lh) { - int (*fn)(struct linux_binprm *) = fmt->load_binary; - if (!fn) - continue; - if (!try_module_get(fmt->module)) - continue; - read_unlock(&binfmt_lock); - bprm->recursion_depth = depth + 1; - retval = fn(bprm); - bprm->recursion_depth = depth; - if (retval >= 0) { - if (depth == 0) { - trace_sched_process_exec(current, old_pid, bprm); - ptrace_event(PTRACE_EVENT_EXEC, old_vpid); - } - put_binfmt(fmt); - allow_write_access(bprm->file); - if (bprm->file) - fput(bprm->file); - bprm->file = NULL; - current->did_exec = 1; - proc_exec_connector(current); - return retval; - } - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (retval != -ENOEXEC || bprm->mm == NULL) - break; - if (!bprm->file) { - read_unlock(&binfmt_lock); - return retval; - } + ret = search_binary_handler(bprm); + if (ret >= 0) { + trace_sched_process_exec(current, old_pid, bprm); + ptrace_event(PTRACE_EVENT_EXEC, old_vpid); + current->did_exec = 1; + proc_exec_connector(current); + + if (bprm->file) { + allow_write_access(bprm->file); + fput(bprm->file); + bprm->file = NULL; /* to catch use-after-free */ } - read_unlock(&binfmt_lock); -#ifdef CONFIG_MODULES - if (retval != -ENOEXEC || bprm->mm == NULL) { - break; - } else { -#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) - if (printable(bprm->buf[0]) && - printable(bprm->buf[1]) && - printable(bprm->buf[2]) && - printable(bprm->buf[3])) - break; /* -ENOEXEC */ - if (try) - break; /* -ENOEXEC */ - request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); - } -#else - break; -#endif } - return retval; -} -EXPORT_SYMBOL(search_binary_handler); + return ret; +} /* * sys_execve() executes a new program. @@ -1541,7 +1539,7 @@ static int do_execve_common(const char *filename, if (retval < 0) goto out; - retval = search_binary_handler(bprm); + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 293bc2e47a7..a235f001688 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -231,7 +231,7 @@ static int filldir_one(void * __buf, const char * name, int len, int result = 0; buf->sequence++; - if (buf->ino == ino) { + if (buf->ino == ino && len <= NAME_MAX) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; diff --git a/fs/file_table.c b/fs/file_table.c index 322cd37626c..abdd15ad13c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -311,8 +311,7 @@ void fput(struct file *file) return; /* * After this task has run exit_task_work(), - * task_work_add() will fail. free_ipc_ns()-> - * shm_destroy() can do this. Fall through to delayed + * task_work_add() will fail. Fall through to delayed * fput to avoid leaking *file. */ } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 68851ff2fd4..30f6f27d5a5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -723,7 +723,7 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb, return wrote; } -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, +static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason) { struct wb_writeback_work work = { @@ -1049,10 +1049,8 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) { struct backing_dev_info *bdi; - if (!nr_pages) { - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - } + if (!nr_pages) + nr_pages = get_nr_dirty_pages(); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { @@ -1173,6 +1171,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) bool wakeup_bdi = false; bdi = inode_to_bdi(inode); + spin_unlock(&inode->i_lock); + spin_lock(&bdi->wb.list_lock); if (bdi_cap_writeback_dirty(bdi)) { WARN(!test_bit(BDI_registered, &bdi->state), "bdi-%s not registered\n", bdi->name); @@ -1187,8 +1187,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) wakeup_bdi = true; } - spin_unlock(&inode->i_lock); - spin_lock(&bdi->wb.list_lock); inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &bdi->wb.b_dirty); spin_unlock(&bdi->wb.list_lock); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 8702b732109..73899c1c344 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -913,7 +913,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, (1 << FSCACHE_OP_WAITING) | (1 << FSCACHE_OP_UNUSE_COOKIE); - ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); + ret = radix_tree_maybe_preload(gfp & ~__GFP_HIGHMEM); if (ret < 0) goto nomem_free; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e0fe703ee3d..84434594e80 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -930,7 +930,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) fc->bdi.name = "fuse"; fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; err = bdi_init(&fc->bdi); if (err) diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig index a63371815aa..24bc20fd42f 100644 --- a/fs/hfsplus/Kconfig +++ b/fs/hfsplus/Kconfig @@ -11,3 +11,21 @@ config HFSPLUS_FS MacOS 8. It includes all Mac specific filesystem data such as data forks and creator codes, but it also has several UNIX style features such as file ownership and permissions. + +config HFSPLUS_FS_POSIX_ACL + bool "HFS+ POSIX Access Control Lists" + depends on HFSPLUS_FS + select FS_POSIX_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + It needs to understand that POSIX ACLs are treated only under + Linux. POSIX ACLs doesn't mean something under Mac OS X. + Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs, + which are part of the NFSv4 standard. + + If you don't know what Access Control Lists are, say N diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile index 09d278bb7b9..683fca2e5e6 100644 --- a/fs/hfsplus/Makefile +++ b/fs/hfsplus/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \ attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o + +hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o diff --git a/fs/hfsplus/acl.h b/fs/hfsplus/acl.h new file mode 100644 index 00000000000..07c0d494752 --- /dev/null +++ b/fs/hfsplus/acl.h @@ -0,0 +1,30 @@ +/* + * linux/fs/hfsplus/acl.h + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for Posix Access Control Lists (ACLs) support. + */ + +#include <linux/posix_acl_xattr.h> + +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + +/* posix_acl.c */ +struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type); +extern int hfsplus_posix_acl_chmod(struct inode *); +extern int hfsplus_init_posix_acl(struct inode *, struct inode *); + +#else /* CONFIG_HFSPLUS_FS_POSIX_ACL */ +#define hfsplus_get_posix_acl NULL + +static inline int hfsplus_posix_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */ diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index d8ce4bd17fc..4a4fea00267 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -16,6 +16,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" #include "xattr.h" +#include "acl.h" static inline void hfsplus_instantiate(struct dentry *dentry, struct inode *inode, u32 cnid) @@ -529,6 +530,9 @@ const struct inode_operations hfsplus_dir_inode_operations = { .getxattr = generic_getxattr, .listxattr = hfsplus_listxattr, .removexattr = hfsplus_removexattr, +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + .get_acl = hfsplus_get_posix_acl, +#endif }; const struct file_operations hfsplus_dir_operations = { diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index ede79317cfb..2b9cd01696e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -30,6 +30,7 @@ #define DBG_EXTENT 0x00000020 #define DBG_BITMAP 0x00000040 #define DBG_ATTR_MOD 0x00000080 +#define DBG_ACL_MOD 0x00000100 #if 0 #define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index f833d35630a..4d2edaea891 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -19,6 +19,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" #include "xattr.h" +#include "acl.h" static int hfsplus_readpage(struct file *file, struct page *page) { @@ -316,6 +317,13 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) setattr_copy(inode, attr); mark_inode_dirty(inode); + + if (attr->ia_valid & ATTR_MODE) { + error = hfsplus_posix_acl_chmod(inode); + if (unlikely(error)) + return error; + } + return 0; } @@ -383,6 +391,9 @@ static const struct inode_operations hfsplus_file_inode_operations = { .getxattr = generic_getxattr, .listxattr = hfsplus_listxattr, .removexattr = hfsplus_removexattr, +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + .get_acl = hfsplus_get_posix_acl, +#endif }; static const struct file_operations hfsplus_file_operations = { diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c new file mode 100644 index 00000000000..b609cc14c72 --- /dev/null +++ b/fs/hfsplus/posix_acl.c @@ -0,0 +1,274 @@ +/* + * linux/fs/hfsplus/posix_acl.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for Posix Access Control Lists (ACLs) support. + */ + +#include "hfsplus_fs.h" +#include "xattr.h" +#include "acl.h" + +struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) +{ + struct posix_acl *acl; + char *xattr_name; + char *value = NULL; + ssize_t size; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + switch (type) { + case ACL_TYPE_ACCESS: + xattr_name = POSIX_ACL_XATTR_ACCESS; + break; + case ACL_TYPE_DEFAULT: + xattr_name = POSIX_ACL_XATTR_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + + size = __hfsplus_getxattr(inode, xattr_name, NULL, 0); + + if (size > 0) { + value = (char *)hfsplus_alloc_attr_entry(); + if (unlikely(!value)) + return ERR_PTR(-ENOMEM); + size = __hfsplus_getxattr(inode, xattr_name, value, size); + } + + if (size > 0) + acl = posix_acl_from_xattr(&init_user_ns, value, size); + else if (size == -ENODATA) + acl = NULL; + else + acl = ERR_PTR(size); + + hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); + + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + return acl; +} + +static int hfsplus_set_posix_acl(struct inode *inode, + int type, + struct posix_acl *acl) +{ + int err; + char *xattr_name; + size_t size = 0; + char *value = NULL; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + xattr_name = POSIX_ACL_XATTR_ACCESS; + if (acl) { + err = posix_acl_equiv_mode(acl, &inode->i_mode); + if (err < 0) + return err; + } + err = 0; + break; + + case ACL_TYPE_DEFAULT: + xattr_name = POSIX_ACL_XATTR_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE)) + return -ENOMEM; + value = (char *)hfsplus_alloc_attr_entry(); + if (unlikely(!value)) + return -ENOMEM; + err = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (unlikely(err < 0)) + goto end_set_acl; + } + + err = __hfsplus_setxattr(inode, xattr_name, value, size, 0); + +end_set_acl: + hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); + + if (!err) + set_cached_acl(inode, type, acl); + + return err; +} + +int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) +{ + int err = 0; + struct posix_acl *acl = NULL; + + hfs_dbg(ACL_MOD, + "[%s]: ino %lu, dir->ino %lu\n", + __func__, inode->i_ino, dir->i_ino); + + if (S_ISLNK(inode->i_mode)) + return 0; + + acl = hfsplus_get_posix_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { + if (S_ISDIR(inode->i_mode)) { + err = hfsplus_set_posix_acl(inode, + ACL_TYPE_DEFAULT, + acl); + if (unlikely(err)) + goto init_acl_cleanup; + } + + err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + if (unlikely(err < 0)) + return err; + + if (err > 0) + err = hfsplus_set_posix_acl(inode, + ACL_TYPE_ACCESS, + acl); + } else + inode->i_mode &= ~current_umask(); + +init_acl_cleanup: + posix_acl_release(acl); + return err; +} + +int hfsplus_posix_acl_chmod(struct inode *inode) +{ + int err; + struct posix_acl *acl; + + hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino); + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = hfsplus_get_posix_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + err = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (unlikely(err)) + return err; + + err = hfsplus_set_posix_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return err; +} + +static int hfsplus_xattr_get_posix_acl(struct dentry *dentry, + const char *name, + void *buffer, + size_t size, + int type) +{ + int err = 0; + struct posix_acl *acl; + + hfs_dbg(ACL_MOD, + "[%s]: ino %lu, buffer %p, size %zu, type %#x\n", + __func__, dentry->d_inode->i_ino, buffer, size, type); + + if (strcmp(name, "") != 0) + return -EINVAL; + + acl = hfsplus_get_posix_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + posix_acl_release(acl); + + return err; +} + +static int hfsplus_xattr_set_posix_acl(struct dentry *dentry, + const char *name, + const void *value, + size_t size, + int flags, + int type) +{ + int err = 0; + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + + hfs_dbg(ACL_MOD, + "[%s]: ino %lu, value %p, size %zu, flags %#x, type %#x\n", + __func__, inode->i_ino, value, size, flags, type); + + if (strcmp(name, "") != 0) + return -EINVAL; + + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + err = posix_acl_valid(acl); + if (err) + goto end_xattr_set_acl; + } + } + + err = hfsplus_set_posix_acl(inode, type, acl); + +end_xattr_set_acl: + posix_acl_release(acl); + return err; +} + +static size_t hfsplus_xattr_list_posix_acl(struct dentry *dentry, + char *list, + size_t list_size, + const char *name, + size_t name_len, + int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = hfsplus_xattr_list_posix_acl, + .get = hfsplus_xattr_get_posix_acl, + .set = hfsplus_xattr_set_posix_acl, +}; + +const struct xattr_handler hfsplus_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = hfsplus_xattr_list_posix_acl, + .get = hfsplus_xattr_get_posix_acl, + .set = hfsplus_xattr_set_posix_acl, +}; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index f66346155df..bd8471fb9a6 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -8,11 +8,16 @@ #include "hfsplus_fs.h" #include "xattr.h" +#include "acl.h" const struct xattr_handler *hfsplus_xattr_handlers[] = { &hfsplus_xattr_osx_handler, &hfsplus_xattr_user_handler, &hfsplus_xattr_trusted_handler, +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + &hfsplus_xattr_acl_access_handler, + &hfsplus_xattr_acl_default_handler, +#endif &hfsplus_xattr_security_handler, NULL }; @@ -46,11 +51,58 @@ static inline int is_known_namespace(const char *name) return true; } +static int can_set_system_xattr(struct inode *inode, const char *name, + const void *value, size_t size) +{ +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + struct posix_acl *acl; + int err; + + if (!inode_owner_or_capable(inode)) + return -EPERM; + + /* + * POSIX_ACL_XATTR_ACCESS is tied to i_mode + */ + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + err = posix_acl_equiv_mode(acl, &inode->i_mode); + posix_acl_release(acl); + if (err < 0) + return err; + mark_inode_dirty(inode); + } + /* + * We're changing the ACL. Get rid of the cached one + */ + forget_cached_acl(inode, ACL_TYPE_ACCESS); + + return 0; + } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + posix_acl_release(acl); + + /* + * We're changing the default ACL. Get rid of the cached one + */ + forget_cached_acl(inode, ACL_TYPE_DEFAULT); + + return 0; + } +#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */ + return -EOPNOTSUPP; +} + static int can_set_xattr(struct inode *inode, const char *name, const void *value, size_t value_len) { if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return -EOPNOTSUPP; /* TODO: implement ACL support */ + return can_set_system_xattr(inode, name, value, value_len); if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) { /* @@ -253,11 +305,10 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len) return len; } -static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, +static ssize_t hfsplus_getxattr_finder_info(struct inode *inode, void *value, size_t size) { ssize_t res = 0; - struct inode *inode = dentry->d_inode; struct hfs_find_data fd; u16 entry_type; u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo); @@ -304,10 +355,9 @@ end_getxattr_finder_info: return res; } -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size) { - struct inode *inode = dentry->d_inode; struct hfs_find_data fd; hfsplus_attr_entry *entry; __be32 xattr_record_type; @@ -333,7 +383,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, } if (!strcmp_xattr_finder_info(name)) - return hfsplus_getxattr_finder_info(dentry, value, size); + return hfsplus_getxattr_finder_info(inode, value, size); if (!HFSPLUS_SB(inode->i_sb)->attr_tree) return -EOPNOTSUPP; diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h index 847b695b984..841b5698c0f 100644 --- a/fs/hfsplus/xattr.h +++ b/fs/hfsplus/xattr.h @@ -14,8 +14,8 @@ extern const struct xattr_handler hfsplus_xattr_osx_handler; extern const struct xattr_handler hfsplus_xattr_user_handler; extern const struct xattr_handler hfsplus_xattr_trusted_handler; -/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/ -/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/ +extern const struct xattr_handler hfsplus_xattr_acl_access_handler; +extern const struct xattr_handler hfsplus_xattr_acl_default_handler; extern const struct xattr_handler hfsplus_xattr_security_handler; extern const struct xattr_handler *hfsplus_xattr_handlers[]; @@ -29,9 +29,17 @@ static inline int hfsplus_setxattr(struct dentry *dentry, const char *name, return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags); } -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size); +static inline ssize_t hfsplus_getxattr(struct dentry *dentry, + const char *name, + void *value, + size_t size) +{ + return __hfsplus_getxattr(dentry->d_inode, name, value, size); +} + ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); int hfsplus_removexattr(struct dentry *dentry, const char *name); @@ -39,22 +47,7 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name); int hfsplus_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr); -static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir) -{ - /*TODO: implement*/ - return 0; -} - -static inline int hfsplus_init_inode_security(struct inode *inode, - struct inode *dir, - const struct qstr *qstr) -{ - int err; - - err = hfsplus_init_acl(inode, dir); - if (!err) - err = hfsplus_init_security(inode, dir, qstr); - return err; -} +int hfsplus_init_inode_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr); #endif diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index 83b842f113c..00722765ea7 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -9,6 +9,7 @@ #include <linux/security.h> #include "hfsplus_fs.h" #include "xattr.h" +#include "acl.h" static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size, int type) @@ -96,6 +97,18 @@ int hfsplus_init_security(struct inode *inode, struct inode *dir, &hfsplus_initxattrs, NULL); } +int hfsplus_init_inode_security(struct inode *inode, + struct inode *dir, + const struct qstr *qstr) +{ + int err; + + err = hfsplus_init_posix_acl(inode, dir); + if (!err) + err = hfsplus_init_security(inode, dir, qstr); + return err; +} + const struct xattr_handler hfsplus_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .list = hfsplus_security_listxattr, diff --git a/fs/internal.h b/fs/internal.h index d2089379552..2be46ea5dd0 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -45,6 +45,9 @@ extern void __init chrdev_init(void); * namei.c */ extern int __inode_permission(struct inode *, int); +extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); +extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct path *); /* * namespace.c diff --git a/fs/namei.c b/fs/namei.c index 56e4f4d537d..409a441ba2a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -574,9 +574,12 @@ unlock_and_drop_dentry: drop_dentry: unlock_rcu_walk(); dput(dentry); - return -ECHILD; + goto drop_root_mnt; out: unlock_rcu_walk(); +drop_root_mnt: + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; return -ECHILD; } @@ -2206,7 +2209,7 @@ user_path_parent(int dfd, const char __user *path, struct nameidata *nd, } /** - * umount_lookup_last - look up last component for umount + * mountpoint_last - look up last component for umount * @nd: pathwalk nameidata - currently pointing at parent directory of "last" * @path: pointer to container for result * @@ -2233,25 +2236,28 @@ user_path_parent(int dfd, const char __user *path, struct nameidata *nd, * to the link, and nd->path will *not* be put. */ static int -umount_lookup_last(struct nameidata *nd, struct path *path) +mountpoint_last(struct nameidata *nd, struct path *path) { int error = 0; struct dentry *dentry; struct dentry *dir = nd->path.dentry; - if (unlikely(nd->flags & LOOKUP_RCU)) { - WARN_ON_ONCE(1); - error = -ECHILD; - goto error_check; + /* If we're in rcuwalk, drop out of it to handle last component */ + if (nd->flags & LOOKUP_RCU) { + if (unlazy_walk(nd, NULL)) { + error = -ECHILD; + goto out; + } } nd->flags &= ~LOOKUP_PARENT; if (unlikely(nd->last_type != LAST_NORM)) { error = handle_dots(nd, nd->last_type); - if (!error) - dentry = dget(nd->path.dentry); - goto error_check; + if (error) + goto out; + dentry = dget(nd->path.dentry); + goto done; } mutex_lock(&dir->d_inode->i_mutex); @@ -2265,44 +2271,43 @@ umount_lookup_last(struct nameidata *nd, struct path *path) dentry = d_alloc(dir, &nd->last); if (!dentry) { error = -ENOMEM; - } else { - dentry = lookup_real(dir->d_inode, dentry, nd->flags); - if (IS_ERR(dentry)) - error = PTR_ERR(dentry); + goto out; } + dentry = lookup_real(dir->d_inode, dentry, nd->flags); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out; } mutex_unlock(&dir->d_inode->i_mutex); -error_check: - if (!error) { - if (!dentry->d_inode) { - error = -ENOENT; - dput(dentry); - } else { - path->dentry = dentry; - path->mnt = mntget(nd->path.mnt); - if (should_follow_link(dentry->d_inode, - nd->flags & LOOKUP_FOLLOW)) - return 1; - follow_mount(path); - } +done: + if (!dentry->d_inode) { + error = -ENOENT; + dput(dentry); + goto out; } + path->dentry = dentry; + path->mnt = mntget(nd->path.mnt); + if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) + return 1; + follow_mount(path); + error = 0; +out: terminate_walk(nd); return error; } /** - * path_umountat - look up a path to be umounted + * path_mountpoint - look up a path to be umounted * @dfd: directory file descriptor to start walk from * @name: full pathname to walk * @flags: lookup flags - * @nd: pathwalk nameidata * * Look up the given name, but don't attempt to revalidate the last component. * Returns 0 and "path" will be valid on success; Retuns error otherwise. */ static int -path_umountat(int dfd, const char *name, struct path *path, unsigned int flags) +path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) { struct file *base = NULL; struct nameidata nd; @@ -2317,16 +2322,7 @@ path_umountat(int dfd, const char *name, struct path *path, unsigned int flags) if (err) goto out; - /* If we're in rcuwalk, drop out of it to handle last component */ - if (nd.flags & LOOKUP_RCU) { - err = unlazy_walk(&nd, NULL); - if (err) { - terminate_walk(&nd); - goto out; - } - } - - err = umount_lookup_last(&nd, path); + err = mountpoint_last(&nd, path); while (err > 0) { void *cookie; struct path link = *path; @@ -2337,7 +2333,7 @@ path_umountat(int dfd, const char *name, struct path *path, unsigned int flags) err = follow_link(&link, &nd, &cookie); if (err) break; - err = umount_lookup_last(&nd, path); + err = mountpoint_last(&nd, path); put_link(&nd, &link, cookie); } out: @@ -2350,8 +2346,22 @@ out: return err; } +static int +filename_mountpoint(int dfd, struct filename *s, struct path *path, + unsigned int flags) +{ + int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); + if (unlikely(error == -ECHILD)) + error = path_mountpoint(dfd, s->name, path, flags); + if (unlikely(error == -ESTALE)) + error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); + if (likely(!error)) + audit_inode(s, path->dentry, 0); + return error; +} + /** - * user_path_umountat - lookup a path from userland in order to umount it + * user_path_mountpoint_at - lookup a path from userland in order to umount it * @dfd: directory file descriptor * @name: pathname from userland * @flags: lookup flags @@ -2365,28 +2375,27 @@ out: * Returns 0 and populates "path" on success. */ int -user_path_umountat(int dfd, const char __user *name, unsigned int flags, +user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags, struct path *path) { struct filename *s = getname(name); int error; - if (IS_ERR(s)) return PTR_ERR(s); - - error = path_umountat(dfd, s->name, path, flags | LOOKUP_RCU); - if (unlikely(error == -ECHILD)) - error = path_umountat(dfd, s->name, path, flags); - if (unlikely(error == -ESTALE)) - error = path_umountat(dfd, s->name, path, flags | LOOKUP_REVAL); - - if (likely(!error)) - audit_inode(s, path->dentry, 0); - + error = filename_mountpoint(dfd, s, path, flags); putname(s); return error; } +int +kern_path_mountpoint(int dfd, const char *name, struct path *path, + unsigned int flags) +{ + struct filename s = {.name = name}; + return filename_mountpoint(dfd, &s, path, flags); +} +EXPORT_SYMBOL(kern_path_mountpoint); + /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. diff --git a/fs/namespace.c b/fs/namespace.c index fc2b5226278..da5c4948343 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -17,7 +17,7 @@ #include <linux/security.h> #include <linux/idr.h> #include <linux/acct.h> /* acct_auto_close_mnt */ -#include <linux/ramfs.h> /* init_rootfs */ +#include <linux/init.h> /* init_rootfs */ #include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ #include <linux/uaccess.h> @@ -1321,7 +1321,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; - retval = user_path_umountat(AT_FDCWD, name, lookup_flags, &path); + retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path); if (retval) goto out; mnt = real_mount(path.mnt); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 105a3b080d1..e0a65a9e37e 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -173,8 +173,6 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; if (!nn->rec_file) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 43f42290e5d..0874998a49c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -368,11 +368,8 @@ static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; - struct nfs4_file *fp = stp->st_file; dprintk("NFSD alloc_init_deleg\n"); - if (fp->fi_had_conflict) - return NULL; if (num_delegations > max_delegations) return NULL; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); @@ -389,8 +386,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - get_nfs4_file(fp); - dp->dl_file = fp; + dp->dl_file = NULL; dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; @@ -3035,7 +3031,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp) if (status) { list_del_init(&dp->dl_perclnt); locks_free_lock(fl); - return -ENOMEM; + return status; } fp->fi_lease = fl; fp->fi_deleg_file = get_file(fl->fl_file); @@ -3044,22 +3040,35 @@ static int nfs4_setlease(struct nfs4_delegation *dp) return 0; } -static int nfs4_set_delegation(struct nfs4_delegation *dp) +static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) { - struct nfs4_file *fp = dp->dl_file; + int status; - if (!fp->fi_lease) - return nfs4_setlease(dp); + if (fp->fi_had_conflict) + return -EAGAIN; + get_nfs4_file(fp); + dp->dl_file = fp; + if (!fp->fi_lease) { + status = nfs4_setlease(dp); + if (status) + goto out_free; + return 0; + } spin_lock(&recall_lock); if (fp->fi_had_conflict) { spin_unlock(&recall_lock); - return -EAGAIN; + status = -EAGAIN; + goto out_free; } atomic_inc(&fp->fi_delegees); list_add(&dp->dl_perfile, &fp->fi_delegations); spin_unlock(&recall_lock); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); return 0; +out_free: + put_nfs4_file(fp); + dp->dl_file = fp; + return status; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3134,7 +3143,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); if (dp == NULL) goto out_no_deleg; - status = nfs4_set_delegation(dp); + status = nfs4_set_delegation(dp, stp->st_file); if (status) goto out_free; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 8a404576fb2..b4f788e0ca3 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -51,10 +51,6 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size) return ERR_PTR(-EINVAL); count = size / sizeof(struct posix_acl_entry); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 94417a85ce6..f37d3c0e205 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2044,7 +2044,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, out_write_size: pos += copied; - if (pos > inode->i_size) { + if (pos > i_size_read(inode)) { i_size_write(inode, pos); mark_inode_dirty(inode); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5c1c864e81c..363f0dcc924 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -628,11 +628,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, struct o2nm_node *node, int idx) { - struct list_head *iter; struct o2hb_callback_func *f; - list_for_each(iter, &hbcall->list) { - f = list_entry(iter, struct o2hb_callback_func, hc_item); + list_for_each_entry(f, &hbcall->list, hc_item) { mlog(ML_HEARTBEAT, "calling funcs %p\n", f); (f->hc_func)(node, idx, f->hc_data); } @@ -641,16 +639,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, /* Will run the list in order until we process the passed event */ static void o2hb_run_event_list(struct o2hb_node_event *queued_event) { - int empty; struct o2hb_callback *hbcall; struct o2hb_node_event *event; - spin_lock(&o2hb_live_lock); - empty = list_empty(&queued_event->hn_item); - spin_unlock(&o2hb_live_lock); - if (empty) - return; - /* Holding callback sem assures we don't alter the callback * lists when doing this, and serializes ourselves with other * processes wanting callbacks. */ @@ -709,6 +700,7 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) struct o2hb_node_event event = { .hn_item = LIST_HEAD_INIT(event.hn_item), }; struct o2nm_node *node; + int queued = 0; node = o2nm_get_node_by_num(slot->ds_node_num); if (!node) @@ -726,11 +718,13 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, slot->ds_node_num); + queued = 1; } } spin_unlock(&o2hb_live_lock); - o2hb_run_event_list(&event); + if (queued) + o2hb_run_event_list(&event); o2nm_node_put(node); } @@ -790,6 +784,7 @@ static int o2hb_check_slot(struct o2hb_region *reg, unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; unsigned int slot_dead_ms; int tmp; + int queued = 0; memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); @@ -883,6 +878,7 @@ fire_callbacks: slot->ds_node_num); changed = 1; + queued = 1; } list_add_tail(&slot->ds_live_item, @@ -934,6 +930,7 @@ fire_callbacks: node, slot->ds_node_num); changed = 1; + queued = 1; } /* We don't clear this because the node is still @@ -949,7 +946,8 @@ fire_callbacks: out: spin_unlock(&o2hb_live_lock); - o2hb_run_event_list(&event); + if (queued) + o2hb_run_event_list(&event); if (node) o2nm_node_put(node); @@ -2516,8 +2514,7 @@ unlock: int o2hb_register_callback(const char *region_uuid, struct o2hb_callback_func *hc) { - struct o2hb_callback_func *tmp; - struct list_head *iter; + struct o2hb_callback_func *f; struct o2hb_callback *hbcall; int ret; @@ -2540,10 +2537,9 @@ int o2hb_register_callback(const char *region_uuid, down_write(&o2hb_callback_sem); - list_for_each(iter, &hbcall->list) { - tmp = list_entry(iter, struct o2hb_callback_func, hc_item); - if (hc->hc_priority < tmp->hc_priority) { - list_add_tail(&hc->hc_item, iter); + list_for_each_entry(f, &hbcall->list, hc_item) { + if (hc->hc_priority < f->hc_priority) { + list_add_tail(&hc->hc_item, &f->hc_item); break; } } diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d644dc61142..2cd2406b414 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -543,8 +543,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - printk(KERN_NOTICE "o2net: No longer connected to " - SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); + if (old_sc) + printk(KERN_NOTICE "o2net: No longer connected to " + SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -765,32 +766,32 @@ static struct o2net_msg_handler * o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, struct rb_node **ret_parent) { - struct rb_node **p = &o2net_handler_tree.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p = &o2net_handler_tree.rb_node; + struct rb_node *parent = NULL; struct o2net_msg_handler *nmh, *ret = NULL; int cmp; - while (*p) { - parent = *p; - nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); + while (*p) { + parent = *p; + nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); cmp = o2net_handler_cmp(nmh, msg_type, key); - if (cmp < 0) - p = &(*p)->rb_left; - else if (cmp > 0) - p = &(*p)->rb_right; - else { + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else { ret = nmh; - break; + break; } - } + } - if (ret_p != NULL) - *ret_p = p; - if (ret_parent != NULL) - *ret_parent = parent; + if (ret_p != NULL) + *ret_p = p; + if (ret_parent != NULL) + *ret_parent = parent; - return ret; + return ret; } static void o2net_handler_kref_release(struct kref *kref) @@ -1695,13 +1696,12 @@ static void o2net_start_connect(struct work_struct *work) ret = 0; out: - if (ret) { + if (ret && sc) { printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); /* 0 err so that another will be queued and attempted * from set_nn_state */ - if (sc) - o2net_ensure_shutdown(nn, sc, 0); + o2net_ensure_shutdown(nn, sc, 0); } if (sc) sc_put(sc); @@ -1873,12 +1873,16 @@ static int o2net_accept_one(struct socket *sock) if (o2nm_this_node() >= node->nd_num) { local_node = o2nm_get_node_by_num(o2nm_this_node()); - printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " - "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " - "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, - &(local_node->nd_ipv4_address), - ntohs(local_node->nd_ipv4_port), node->nd_name, - node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); + if (local_node) + printk(KERN_NOTICE "o2net: Unexpected connect attempt " + "seen at node '%s' (%u, %pI4:%d) from " + "node '%s' (%u, %pI4:%d)\n", + local_node->nd_name, local_node->nd_num, + &(local_node->nd_ipv4_address), + ntohs(local_node->nd_ipv4_port), + node->nd_name, + node->nd_num, &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); ret = -EINVAL; goto out; } diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index fbec0be6232..b46278f9ae4 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -292,7 +292,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_lock *lock = NULL; struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; char *name; - struct list_head *iter, *head=NULL; + struct list_head *head = NULL; __be64 cookie; u32 flags; u8 node; @@ -373,8 +373,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, /* try convert queue for both ast/bast */ head = &res->converting; lock = NULL; - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, head, list) { if (lock->ml.cookie == cookie) goto do_ast; } @@ -385,8 +384,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, else head = &res->granted; - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, head, list) { if (lock->ml.cookie == cookie) goto do_ast; } diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index de854cca12a..e0517762fcc 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1079,11 +1079,9 @@ static inline int dlm_lock_compatible(int existing, int request) static inline int dlm_lock_on_list(struct list_head *head, struct dlm_lock *lock) { - struct list_head *iter; struct dlm_lock *tmplock; - list_for_each(iter, head) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, head, list) { if (tmplock == lock) return 1; } diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 29a886d1e82..e36d63ff178 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -123,7 +123,6 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, int *kick_thread) { enum dlm_status status = DLM_NORMAL; - struct list_head *iter; struct dlm_lock *tmplock=NULL; assert_spin_locked(&res->spinlock); @@ -185,16 +184,14 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, /* upconvert from here on */ status = DLM_NORMAL; - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, &res->granted, list) { if (tmplock == lock) continue; if (!dlm_lock_compatible(tmplock->ml.type, type)) goto switch_queues; } - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, &res->converting, list) { if (!dlm_lock_compatible(tmplock->ml.type, type)) goto switch_queues; /* existing conversion requests take precedence */ @@ -424,8 +421,8 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_ctxt *dlm = data; struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; struct dlm_lock_resource *res = NULL; - struct list_head *iter; struct dlm_lock *lock = NULL; + struct dlm_lock *tmp_lock; struct dlm_lockstatus *lksb; enum dlm_status status = DLM_NORMAL; u32 flags; @@ -471,14 +468,13 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, dlm_error(status); goto leave; } - list_for_each(iter, &res->granted) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock->ml.cookie == cnv->cookie && - lock->ml.node == cnv->node_idx) { + list_for_each_entry(tmp_lock, &res->granted, list) { + if (tmp_lock->ml.cookie == cnv->cookie && + tmp_lock->ml.node == cnv->node_idx) { + lock = tmp_lock; dlm_lock_get(lock); break; } - lock = NULL; } spin_unlock(&res->spinlock); if (!lock) { diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 0e28e242226..e33cd7a3c58 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -96,7 +96,6 @@ static void __dlm_print_lock(struct dlm_lock *lock) void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) { - struct list_head *iter2; struct dlm_lock *lock; char buf[DLM_LOCKID_NAME_MAX]; @@ -118,18 +117,15 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) res->inflight_locks, atomic_read(&res->asts_reserved)); dlm_print_lockres_refmap(res); printk(" granted queue:\n"); - list_for_each(iter2, &res->granted) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { __dlm_print_lock(lock); } printk(" converting queue:\n"); - list_for_each(iter2, &res->converting) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->converting, list) { __dlm_print_lock(lock); } printk(" blocked queue:\n"); - list_for_each(iter2, &res->blocked) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->blocked, list) { __dlm_print_lock(lock); } } @@ -446,7 +442,6 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) { struct dlm_master_list_entry *mle; struct hlist_head *bucket; - struct hlist_node *list; int i, out = 0; unsigned long total = 0, longest = 0, bucket_count = 0; @@ -456,9 +451,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) spin_lock(&dlm->master_lock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry(mle, bucket, master_hash_node) { ++total; ++bucket_count; if (len - out < 200) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index dbb17c07656..8b3382abf84 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -193,7 +193,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, unsigned int hash) { struct hlist_head *bucket; - struct hlist_node *list; + struct dlm_lock_resource *res; mlog(0, "%.*s\n", len, name); @@ -201,9 +201,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, bucket = dlm_lockres_hash(dlm, hash); - hlist_for_each(list, bucket) { - struct dlm_lock_resource *res = hlist_entry(list, - struct dlm_lock_resource, hash_node); + hlist_for_each_entry(res, bucket, hash_node) { if (res->lockname.name[0] != name[0]) continue; if (unlikely(res->lockname.len != len)) @@ -262,22 +260,19 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) { - struct dlm_ctxt *tmp = NULL; - struct list_head *iter; + struct dlm_ctxt *tmp; assert_spin_locked(&dlm_domain_lock); /* tmp->name here is always NULL terminated, * but domain may not be! */ - list_for_each(iter, &dlm_domains) { - tmp = list_entry (iter, struct dlm_ctxt, list); + list_for_each_entry(tmp, &dlm_domains, list) { if (strlen(tmp->name) == len && memcmp(tmp->name, domain, len)==0) - break; - tmp = NULL; + return tmp; } - return tmp; + return NULL; } /* For null terminated domain strings ONLY */ @@ -366,25 +361,22 @@ static void __dlm_get(struct dlm_ctxt *dlm) * you shouldn't trust your pointer. */ struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) { - struct list_head *iter; - struct dlm_ctxt *target = NULL; + struct dlm_ctxt *target; + struct dlm_ctxt *ret = NULL; spin_lock(&dlm_domain_lock); - list_for_each(iter, &dlm_domains) { - target = list_entry (iter, struct dlm_ctxt, list); - + list_for_each_entry(target, &dlm_domains, list) { if (target == dlm) { __dlm_get(target); + ret = target; break; } - - target = NULL; } spin_unlock(&dlm_domain_lock); - return target; + return ret; } int dlm_domain_fully_joined(struct dlm_ctxt *dlm) @@ -2296,13 +2288,10 @@ static DECLARE_RWSEM(dlm_callback_sem); void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, int node_num) { - struct list_head *iter; struct dlm_eviction_cb *cb; down_read(&dlm_callback_sem); - list_for_each(iter, &dlm->dlm_eviction_callbacks) { - cb = list_entry(iter, struct dlm_eviction_cb, ec_item); - + list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) { cb->ec_func(node_num, cb->ec_data); } up_read(&dlm_callback_sem); diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 47e67c2d228..5d32f7511f7 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -91,19 +91,14 @@ void dlm_destroy_lock_cache(void) static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, struct dlm_lock *lock) { - struct list_head *iter; struct dlm_lock *tmplock; - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); - + list_for_each_entry(tmplock, &res->granted, list) { if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) return 0; } - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); - + list_for_each_entry(tmplock, &res->converting, list) { if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) return 0; if (!dlm_lock_compatible(tmplock->ml.convert_type, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 33ecbe0e673..cf0f103963b 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -342,16 +342,13 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, { struct dlm_master_list_entry *tmpmle; struct hlist_head *bucket; - struct hlist_node *list; unsigned int hash; assert_spin_locked(&dlm->master_lock); hash = dlm_lockid_hash(name, namelen); bucket = dlm_master_hash(dlm, hash); - hlist_for_each(list, bucket) { - tmpmle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry(tmpmle, bucket, master_hash_node) { if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) continue; dlm_get_mle(tmpmle); @@ -3183,7 +3180,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) struct dlm_master_list_entry *mle; struct dlm_lock_resource *res; struct hlist_head *bucket; - struct hlist_node *list; + struct hlist_node *tmp; unsigned int i; mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); @@ -3194,10 +3191,7 @@ top: spin_lock(&dlm->master_lock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { BUG_ON(mle->type != DLM_MLE_BLOCK && mle->type != DLM_MLE_MASTER && mle->type != DLM_MLE_MIGRATION); @@ -3378,7 +3372,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) int i; struct hlist_head *bucket; struct dlm_master_list_entry *mle; - struct hlist_node *tmp, *list; + struct hlist_node *tmp; /* * We notified all other nodes that we are exiting the domain and @@ -3394,9 +3388,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each_safe(list, tmp, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { if (mle->type != DLM_MLE_BLOCK) { mlog(ML_ERROR, "bad mle: %p\n", mle); dlm_print_one_mle(mle); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 773bd32bfd8..0b5adca1b17 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -787,6 +787,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, { struct dlm_lock_request lr; int ret; + int status; mlog(0, "\n"); @@ -800,13 +801,15 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, // send message ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, - &lr, sizeof(lr), request_from, NULL); + &lr, sizeof(lr), request_from, &status); /* negative status is handled by caller */ if (ret < 0) mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " "to recover dead node %u\n", dlm->name, ret, request_from, dead_node); + else + ret = status; // return from here, then // sleep until all received or error return ret; @@ -2328,6 +2331,14 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) } else if (res->owner == dlm->node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); + } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { + if (test_bit(dead_node, res->refmap)) { + mlog(0, "%s:%.*s: dead node %u had a ref, but had " + "no locks and had not purged before dying\n", + dlm->name, res->lockname.len, + res->lockname.name, dead_node); + dlm_lockres_clear_refmap_bit(dlm, res, dead_node); + } } spin_unlock(&res->spinlock); } diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index e73c833fc2a..9db869de829 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -286,8 +286,6 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { struct dlm_lock *lock, *target; - struct list_head *iter; - struct list_head *head; int can_grant = 1; /* @@ -314,9 +312,7 @@ converting: dlm->name, res->lockname.len, res->lockname.name); BUG(); } - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, @@ -333,9 +329,8 @@ converting: target->ml.convert_type; } } - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + + list_for_each_entry(lock, &res->converting, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, @@ -384,9 +379,7 @@ blocked: goto leave; target = list_entry(res->blocked.next, struct dlm_lock, list); - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { @@ -400,9 +393,7 @@ blocked: } } - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->converting, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 850aa7e8753..5698b52cf5c 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -388,7 +388,6 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_ctxt *dlm = data; struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; struct dlm_lock_resource *res = NULL; - struct list_head *iter; struct dlm_lock *lock = NULL; enum dlm_status status = DLM_NORMAL; int found = 0, i; @@ -458,8 +457,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, } for (i=0; i<3; i++) { - list_for_each(iter, queue) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, queue, list) { if (lock->ml.cookie == unlock->cookie && lock->ml.node == unlock->node_idx) { dlm_lock_get(lock); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 12bafb7265c..efa2b3d339e 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -401,11 +401,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); umode_t mode = S_IFDIR | 0755; - struct dlmfs_inode_private *ip; if (inode) { - ip = DLMFS_I(inode); - inode->i_ino = get_next_ino(); inode_init_owner(inode, NULL, mode); inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2487116d0d3..767370b656c 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, cpos = map_start >> osb->s_clustersize_bits; mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, map_start + map_len); - mapping_end -= cpos; is_last = 0; while (cpos < mapping_end && !is_last) { u32 fe_flags; @@ -852,20 +851,20 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) down_read(&OCFS2_I(inode)->ip_alloc_sem); - if (*offset >= inode->i_size) { + if (*offset >= i_size_read(inode)) { ret = -ENXIO; goto out_unlock; } if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { if (whence == SEEK_HOLE) - *offset = inode->i_size; + *offset = i_size_read(inode); goto out_unlock; } clen = 0; cpos = *offset >> cs_bits; - cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); + cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); while (cpos < cend && !is_last) { ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, @@ -904,8 +903,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) extlen = clen; extlen <<= cs_bits; - if ((extoff + extlen) > inode->i_size) - extlen = inode->i_size - extoff; + if ((extoff + extlen) > i_size_read(inode)) + extlen = i_size_read(inode) - extoff; extoff += extlen; if (extoff > *offset) *offset = extoff; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3261d71319e..4f8197caa48 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -671,11 +671,7 @@ restarted_transaction: } else { BUG_ON(why != RESTART_TRANS); - /* TODO: This can be more intelligent. */ - credits = ocfs2_calc_extend_credits(osb->sb, - &fe->id2.i_list, - clusters_to_add); - status = ocfs2_extend_trans(handle, credits); + status = ocfs2_allocate_extend_trans(handle, 1); if (status < 0) { /* handle still has to be committed at * this point. */ @@ -1800,6 +1796,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); out: + ocfs2_free_path(path); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &dealloc); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 0c60ef2d805..fa32ce9b455 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -303,7 +303,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, if (o2info_from_user(oij, req)) goto bail; - oij.ij_journal_size = osb->journal->j_inode->i_size; + oij.ij_journal_size = i_size_read(osb->journal->j_inode); o2info_set_request_filled(&oij.ij_req); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 242170d8397..44fc3e530c3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -455,6 +455,41 @@ bail: return status; } +/* + * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. + * If that fails, restart the transaction & regain write access for the + * buffer head which is used for metadata modifications. + * Taken from Ext4: extend_or_restart_transaction() + */ +int ocfs2_allocate_extend_trans(handle_t *handle, int thresh) +{ + int status, old_nblks; + + BUG_ON(!handle); + + old_nblks = handle->h_buffer_credits; + trace_ocfs2_allocate_extend_trans(old_nblks, thresh); + + if (old_nblks < thresh) + return 0; + + status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + if (status > 0) { + status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) + mlog_errno(status); + } + +bail: + return status; +} + + struct ocfs2_triggers { struct jbd2_buffer_trigger_type ot_triggers; int ot_offset; @@ -801,14 +836,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) inode_lock = 1; di = (struct ocfs2_dinode *)bh->b_data; - if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { + if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) { mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", - inode->i_size); + i_size_read(inode)); status = -EINVAL; goto done; } - trace_ocfs2_journal_init(inode->i_size, + trace_ocfs2_journal_init(i_size_read(inode), (unsigned long long)inode->i_blocks, OCFS2_I(inode)->ip_clusters); @@ -1096,7 +1131,7 @@ static int ocfs2_force_read_journal(struct inode *inode) memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); - num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); + num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); v_blkno = 0; while (v_blkno < num_blocks) { status = ocfs2_extent_map_get_blocks(inode, v_blkno, diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 0a992737dca..0b479bab367 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -258,6 +258,17 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int ocfs2_commit_trans(struct ocfs2_super *osb, handle_t *handle); int ocfs2_extend_trans(handle_t *handle, int nblocks); +int ocfs2_allocate_extend_trans(handle_t *handle, + int thresh); + +/* + * Define an arbitrary limit for the amount of data we will anticipate + * writing to any given transaction. For unbounded transactions such as + * fallocate(2) we can write more than this, but we always + * start off at the maximum transaction size and grow the transaction + * optimistically as we go. + */ +#define OCFS2_MAX_TRANS_DATA 64U /* * Create access is for when we get a newly created buffer and we're diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index aebeacd807c..cd5496b7a0a 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -1082,7 +1082,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, } retry_enospc: - (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; + (*ac)->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); if (status == -ENOSPC) { if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == @@ -1154,7 +1154,7 @@ retry_enospc: OCFS2_LA_DISABLED) goto bail; - ac->ac_bits_wanted = osb->local_alloc_default_bits; + ac->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, &cluster_off, diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 452068b4574..3d3f3c83065 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -152,6 +152,7 @@ static int __ocfs2_move_extent(handle_t *handle, } out: + ocfs2_free_path(path); return ret; } @@ -845,7 +846,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, struct ocfs2_move_extents *range = context->range; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if ((inode->i_size == 0) || (range->me_len == 0)) + if ((i_size_read(inode) == 0) || (range->me_len == 0)) return 0; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 3b481f49063..1b60c62aa9d 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -2579,6 +2579,8 @@ DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); +DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); + DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 332a281f217..aaa50611ec6 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; } - if (gqinode->i_size < off + len) { + if (i_size_read(gqinode) < off + len) { loff_t rounded_end = ocfs2_align_bytes_to_blocks(sb, off + len); @@ -778,8 +778,8 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) */ WARN_ON(journal_current_handle()); status = ocfs2_extend_no_holes(gqinode, NULL, - gqinode->i_size + (need_alloc << sb->s_blocksize_bits), - gqinode->i_size); + i_size_read(gqinode) + (need_alloc << sb->s_blocksize_bits), + i_size_read(gqinode)); if (status < 0) goto out_dq; } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 27fe7ee4874..2e4344be3b9 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -982,14 +982,14 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( /* We are protected by dqio_sem so no locking needed */ status = ocfs2_extend_no_holes(lqinode, NULL, - lqinode->i_size + 2 * sb->s_blocksize, - lqinode->i_size); + i_size_read(lqinode) + 2 * sb->s_blocksize, + i_size_read(lqinode)); if (status < 0) { mlog_errno(status); goto out; } status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, - lqinode->i_size + 2 * sb->s_blocksize); + i_size_read(lqinode) + 2 * sb->s_blocksize); if (status < 0) { mlog_errno(status); goto out; @@ -1125,14 +1125,14 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( /* We are protected by dqio_sem so no locking needed */ status = ocfs2_extend_no_holes(lqinode, NULL, - lqinode->i_size + sb->s_blocksize, - lqinode->i_size); + i_size_read(lqinode) + sb->s_blocksize, + i_size_read(lqinode)); if (status < 0) { mlog_errno(status); goto out; } status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, - lqinode->i_size + sb->s_blocksize); + i_size_read(lqinode) + sb->s_blocksize); if (status < 0) { mlog_errno(status); goto out; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index a70d604593b..bf4dfc14bb2 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3854,7 +3854,10 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, while (cpos < clusters) { ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, &ext_flags); - + if (ret) { + mlog_errno(ret); + goto unlock; + } if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { ret = ocfs2_add_refcount_flag(inode, &di_et, &ref_tree->rf_ci, @@ -4025,7 +4028,10 @@ static int ocfs2_duplicate_extent_list(struct inode *s_inode, while (cpos < clusters) { ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, &num_clusters, &ext_flags); - + if (ret) { + mlog_errno(ret); + goto out; + } if (p_cluster) { ret = ocfs2_add_refcounted_extent(t_inode, &et, ref_ci, ref_root_bh, diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 317ef0abccb..6ce0686eab7 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3505,7 +3505,7 @@ int ocfs2_xattr_set(struct inode *inode, int ret, credits, ref_meta = 0, ref_credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; - struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; + struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; struct ocfs2_refcount_tree *ref_tree = NULL; struct ocfs2_xattr_info xi = { @@ -3609,13 +3609,14 @@ int ocfs2_xattr_set(struct inode *inode, if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); - goto cleanup; + goto out_free_ac; } ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); ocfs2_commit_trans(osb, ctxt.handle); +out_free_ac: if (ctxt.data_ac) ocfs2_free_alloc_context(ctxt.data_ac); if (ctxt.meta_ac) @@ -5881,6 +5882,10 @@ static int ocfs2_xattr_value_attach_refcount(struct inode *inode, while (cpos < clusters) { ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, &num_clusters, el, &ext_flags); + if (ret) { + mlog_errno(ret); + break; + } cpos += num_clusters; if ((ext_flags & OCFS2_EXT_REFCOUNTED)) @@ -6797,7 +6802,7 @@ out: if (ret) { if (*meta_ac) { ocfs2_free_alloc_context(*meta_ac); - meta_ac = NULL; + *meta_ac = NULL; } } diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 0ff80f9b930..985ea881b5b 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -286,7 +286,7 @@ int proc_fd_permission(struct inode *inode, int mask) int rv = generic_permission(inode, mask); if (rv == 0) return 0; - if (task_pid(current) == proc_pid(inode)) + if (task_tgid(current) == proc_pid(inode)) rv = 0; return rv; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 107d026f5d6..7366e9d63ce 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -740,6 +740,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ptent = pte_file_clear_soft_dirty(ptent); } + if (vma->vm_flags & VM_SOFTDIRTY) + vma->vm_flags &= ~VM_SOFTDIRTY; + set_pte_at(vma->vm_mm, addr, pte, ptent); #endif } @@ -949,13 +952,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, if (is_migration_entry(entry)) page = migration_entry_to_page(entry); } else { - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + if (vma->vm_flags & VM_SOFTDIRTY) + flags2 |= __PM_SOFT_DIRTY; + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); return; } if (page && !PageAnon(page)) flags |= PM_FILE; - if (pte_soft_dirty(pte)) + if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte)) flags2 |= __PM_SOFT_DIRTY; *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); @@ -974,7 +979,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2)); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, @@ -997,7 +1002,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { int pmd_flags2; - pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); + if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) + pmd_flags2 = __PM_SOFT_DIRTY; + else + pmd_flags2 = 0; + for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; @@ -1015,12 +1024,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) return 0; for (; addr != end; addr += PAGE_SIZE) { + int flags2; /* check to see if we've left 'vma' behind * and need a new, higher one */ if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); - pme = make_pme(PM_NOT_PRESENT(pm->v2)); + if (vma && (vma->vm_flags & VM_SOFTDIRTY)) + flags2 = __PM_SOFT_DIRTY; + else + flags2 = 0; + pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); } /* check that 'vma' actually covers this address, @@ -1044,13 +1058,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, #ifdef CONFIG_HUGETLB_PAGE static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, - pte_t pte, int offset) + pte_t pte, int offset, int flags2) { if (pte_present(pte)) - *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) - | PM_STATUS2(pm->v2, 0) | PM_PRESENT); + *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | + PM_STATUS2(pm->v2, flags2) | + PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | + PM_STATUS2(pm->v2, flags2)); } /* This function walks within one hugetlb entry in the single call */ @@ -1059,12 +1075,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, struct mm_walk *walk) { struct pagemapread *pm = walk->private; + struct vm_area_struct *vma; int err = 0; + int flags2; pagemap_entry_t pme; + vma = find_vma(walk->mm, addr); + WARN_ON_ONCE(!vma); + + if (vma && (vma->vm_flags & VM_SOFTDIRTY)) + flags2 = __PM_SOFT_DIRTY; + else + flags2 = 0; + for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; - huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); + huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2); err = add_to_pagemap(addr, &pme, pm); if (err) return err; @@ -1376,8 +1402,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.mm = mm; pol = get_vma_policy(task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol); + n = mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); + if (n < 0) + return n; seq_printf(m, "%08lx %s", vma->vm_start, buffer); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a1a16eb97c7..9100d695988 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -21,6 +21,7 @@ #include <linux/crash_dump.h> #include <linux/list.h> #include <linux/vmalloc.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> #include <asm/io.h> #include "internal.h" @@ -123,11 +124,65 @@ static ssize_t read_from_oldmem(char *buf, size_t count, return read; } +/* + * Architectures may override this function to allocate ELF header in 2nd kernel + */ +int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) +{ + return 0; +} + +/* + * Architectures may override this function to free header + */ +void __weak elfcorehdr_free(unsigned long long addr) +{} + +/* + * Architectures may override this function to read from ELF header + */ +ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + return read_from_oldmem(buf, count, ppos, 0); +} + +/* + * Architectures may override this function to read from notes sections + */ +ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + return read_from_oldmem(buf, count, ppos, 0); +} + +/* + * Architectures may override this function to map oldmem + */ +int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Copy to either kernel or user space + */ +static int copy_to(void *target, void *src, size_t size, int userbuf) +{ + if (userbuf) { + if (copy_to_user((char __user *) target, src, size)) + return -EFAULT; + } else { + memcpy(target, src, size); + } + return 0; +} + /* Read from the ELF header and then the crash dump. On error, negative value is * returned otherwise number of bytes read are returned. */ -static ssize_t read_vmcore(struct file *file, char __user *buffer, - size_t buflen, loff_t *fpos) +static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, + int userbuf) { ssize_t acc = 0, tmp; size_t tsz; @@ -144,7 +199,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, /* Read ELF core header */ if (*fpos < elfcorebuf_sz) { tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); - if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) + if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf)) return -EFAULT; buflen -= tsz; *fpos += tsz; @@ -162,7 +217,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; - if (copy_to_user(buffer, kaddr, tsz)) + if (copy_to(buffer, kaddr, tsz, userbuf)) return -EFAULT; buflen -= tsz; *fpos += tsz; @@ -178,7 +233,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, if (*fpos < m->offset + m->size) { tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); start = m->paddr + *fpos - m->offset; - tmp = read_from_oldmem(buffer, tsz, &start, 1); + tmp = read_from_oldmem(buffer, tsz, &start, userbuf); if (tmp < 0) return tmp; buflen -= tsz; @@ -195,6 +250,55 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } +static ssize_t read_vmcore(struct file *file, char __user *buffer, + size_t buflen, loff_t *fpos) +{ + return __read_vmcore((__force char *) buffer, buflen, fpos, 1); +} + +/* + * The vmcore fault handler uses the page cache and fills data using the + * standard __vmcore_read() function. + * + * On s390 the fault handler is used for memory regions that can't be mapped + * directly with remap_pfn_range(). + */ +static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#ifdef CONFIG_S390 + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t index = vmf->pgoff; + struct page *page; + loff_t offset; + char *buf; + int rc; + + page = find_or_create_page(mapping, index, GFP_KERNEL); + if (!page) + return VM_FAULT_OOM; + if (!PageUptodate(page)) { + offset = (loff_t) index << PAGE_CACHE_SHIFT; + buf = __va((page_to_pfn(page) << PAGE_SHIFT)); + rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0); + if (rc < 0) { + unlock_page(page); + page_cache_release(page); + return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + } + SetPageUptodate(page); + } + unlock_page(page); + vmf->page = page; + return 0; +#else + return VM_FAULT_SIGBUS; +#endif +} + +static const struct vm_operations_struct vmcore_mmap_ops = { + .fault = mmap_vmcore_fault, +}; + /** * alloc_elfnotes_buf - allocate buffer for ELF note segment in * vmalloc memory @@ -223,7 +327,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz) * regions in the 1st kernel pointed to by PT_LOAD entries) into * virtually contiguous user-space in ELF layout. */ -#if defined(CONFIG_MMU) && !defined(CONFIG_S390) +#ifdef CONFIG_MMU static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; @@ -241,6 +345,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); vma->vm_flags |= VM_MIXEDMAP; + vma->vm_ops = &vmcore_mmap_ops; len = 0; @@ -282,9 +387,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min_t(size_t, m->offset + m->size - start, size); paddr = m->paddr + start - m->offset; - if (remap_pfn_range(vma, vma->vm_start + len, - paddr >> PAGE_SHIFT, tsz, - vma->vm_page_prot)) + if (remap_oldmem_pfn_range(vma, vma->vm_start + len, + paddr >> PAGE_SHIFT, tsz, + vma->vm_page_prot)) goto fail; size -= tsz; start += tsz; @@ -357,7 +462,7 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) notes_section = kmalloc(max_sz, GFP_KERNEL); if (!notes_section) return -ENOMEM; - rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); if (rc < 0) { kfree(notes_section); return rc; @@ -444,7 +549,8 @@ static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) if (phdr_ptr->p_type != PT_NOTE) continue; offset = phdr_ptr->p_offset; - rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, + &offset); if (rc < 0) return rc; notes_buf += phdr_ptr->p_memsz; @@ -536,7 +642,7 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) notes_section = kmalloc(max_sz, GFP_KERNEL); if (!notes_section) return -ENOMEM; - rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); if (rc < 0) { kfree(notes_section); return rc; @@ -623,7 +729,8 @@ static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) if (phdr_ptr->p_type != PT_NOTE) continue; offset = phdr_ptr->p_offset; - rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, + &offset); if (rc < 0) return rc; notes_buf += phdr_ptr->p_memsz; @@ -810,7 +917,7 @@ static int __init parse_crash_elf64_headers(void) addr = elfcorehdr_addr; /* Read Elf header */ - rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0); + rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr); if (rc < 0) return rc; @@ -837,7 +944,7 @@ static int __init parse_crash_elf64_headers(void) if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); if (rc < 0) goto fail; @@ -866,7 +973,7 @@ static int __init parse_crash_elf32_headers(void) addr = elfcorehdr_addr; /* Read Elf header */ - rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0); + rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr); if (rc < 0) return rc; @@ -892,7 +999,7 @@ static int __init parse_crash_elf32_headers(void) if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); if (rc < 0) goto fail; @@ -919,7 +1026,7 @@ static int __init parse_crash_elf_headers(void) int rc=0; addr = elfcorehdr_addr; - rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0); + rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr); if (rc < 0) return rc; if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { @@ -952,7 +1059,14 @@ static int __init vmcore_init(void) { int rc = 0; - /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ + /* Allow architectures to allocate ELF header in 2nd kernel */ + rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size); + if (rc) + return rc; + /* + * If elfcorehdr= has been passed in cmdline or created in 2nd kernel, + * then capture the dump. + */ if (!(is_vmcore_usable())) return rc; rc = parse_crash_elf_headers(); @@ -960,6 +1074,8 @@ static int __init vmcore_init(void) pr_warn("Kdump: vmcore not initialized\n"); return rc; } + elfcorehdr_free(elfcorehdr_addr); + elfcorehdr_addr = ELFCORE_ADDR_ERR; proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); if (proc_vmcore) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index c24f1e10b94..39d14659a8d 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -244,12 +244,6 @@ struct dentry *ramfs_mount(struct file_system_type *fs_type, return mount_nodev(fs_type, flags, data, ramfs_fill_super); } -static struct dentry *rootfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super); -} - static void ramfs_kill_sb(struct super_block *sb) { kfree(sb->s_fs_info); @@ -262,29 +256,23 @@ static struct file_system_type ramfs_fs_type = { .kill_sb = ramfs_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; -static struct file_system_type rootfs_fs_type = { - .name = "rootfs", - .mount = rootfs_mount, - .kill_sb = kill_litter_super, -}; -static int __init init_ramfs_fs(void) -{ - return register_filesystem(&ramfs_fs_type); -} -module_init(init_ramfs_fs) - -int __init init_rootfs(void) +int __init init_ramfs_fs(void) { + static unsigned long once; int err; + if (test_and_set_bit(0, &once)) + return 0; + err = bdi_init(&ramfs_backing_dev_info); if (err) return err; - err = register_filesystem(&rootfs_fs_type); + err = register_filesystem(&ramfs_fs_type); if (err) bdi_destroy(&ramfs_backing_dev_info); return err; } +module_init(init_ramfs_fs) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index fb50652e4e1..41d108ecc9b 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,17 +167,14 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, /* * Block is uncompressed. */ - int i, in, pg_offset = 0; - - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - } + int in, pg_offset = 0; for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { page++; diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index f7f527bf8c1..d8c2d747be2 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -54,6 +54,7 @@ static int get_dir_index_using_offset(struct super_block *sb, { struct squashfs_sb_info *msblk = sb->s_fs_info; int err, i, index, length = 0; + unsigned int size; struct squashfs_dir_index dir_index; TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n", @@ -81,8 +82,14 @@ static int get_dir_index_using_offset(struct super_block *sb, */ break; + size = le32_to_cpu(dir_index.size) + 1; + + /* size should never be larger than SQUASHFS_NAME_LEN */ + if (size > SQUASHFS_NAME_LEN) + break; + err = squashfs_read_metadata(sb, NULL, &index_start, - &index_offset, le32_to_cpu(dir_index.size) + 1); + &index_offset, size); if (err < 0) break; @@ -105,9 +112,8 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; u64 block = squashfs_i(inode)->start + msblk->directory_table; - int offset = squashfs_i(inode)->offset, length, dir_count, size, - type, err; - unsigned int inode_number; + int offset = squashfs_i(inode)->offset, length, err; + unsigned int inode_number, dir_count, size, type; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; @@ -200,6 +206,9 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) ((short) le16_to_cpu(dire->inode_number)); type = le16_to_cpu(dire->type); + if (type > SQUASHFS_MAX_DIR_TYPE) + goto failed_read; + if (!dir_emit(ctx, dire->name, size, inode_number, squashfs_filetype_table[type])) diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 7834a517f7f..67cad77fefb 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -79,7 +79,8 @@ static int get_dir_index_using_name(struct super_block *sb, int len) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int i, size, length = 0, err; + int i, length = 0, err; + unsigned int size; struct squashfs_dir_index *index; char *str; @@ -103,6 +104,8 @@ static int get_dir_index_using_name(struct super_block *sb, size = le32_to_cpu(index->size) + 1; + if (size > SQUASHFS_NAME_LEN) + break; err = squashfs_read_metadata(sb, index->name, &index_start, &index_offset, size); @@ -144,7 +147,8 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, struct squashfs_dir_entry *dire; u64 block = squashfs_i(dir)->start + msblk->directory_table; int offset = squashfs_i(dir)->offset; - int err, length, dir_count, size; + int err, length; + unsigned int dir_count, size; TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 9e2349d07cb..4b2beda4949 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -87,7 +87,7 @@ #define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \ SQUASHFS_COMP_OPT) -/* Max number of types and file types */ +/* Inode types including extended types */ #define SQUASHFS_DIR_TYPE 1 #define SQUASHFS_REG_TYPE 2 #define SQUASHFS_SYMLINK_TYPE 3 @@ -103,6 +103,9 @@ #define SQUASHFS_LFIFO_TYPE 13 #define SQUASHFS_LSOCKET_TYPE 14 +/* Max type value stored in directory entry */ +#define SQUASHFS_MAX_DIR_TYPE 7 + /* Xattr types */ #define SQUASHFS_XATTR_USER 0 #define SQUASHFS_XATTR_TRUSTED 1 diff --git a/fs/super.c b/fs/super.c index 5536a95186e..f6961ea84c5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -71,7 +71,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) if (!grab_super_passive(sb)) return -1; - if (sb->s_op && sb->s_op->nr_cached_objects) + if (sb->s_op->nr_cached_objects) fs_objects = sb->s_op->nr_cached_objects(sb); total_objects = sb->s_nr_dentry_unused + |