diff options
Diffstat (limited to 'kernel')
35 files changed, 1302 insertions, 833 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index c53e491e25a..0b72d1a74be 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -70,10 +70,11 @@ obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o obj-$(CONFIG_SMP) += stop_machine.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o -obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o +obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o -obj-$(CONFIG_GCOV_KERNEL) += gcov/ +obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o obj-$(CONFIG_AUDIT_TREE) += audit_tree.o +obj-$(CONFIG_GCOV_KERNEL) += gcov/ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o diff --git a/kernel/acct.c b/kernel/acct.c index 385b88461c2..fa7eb3de2dd 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -122,7 +122,7 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) spin_unlock(&acct_lock); /* May block */ - if (vfs_statfs(file->f_path.dentry, &sbuf)) + if (vfs_statfs(&file->f_path, &sbuf)) return res; suspend = sbuf.f_blocks * SUSPEND; resume = sbuf.f_blocks * RESUME; diff --git a/kernel/audit.c b/kernel/audit.c index 8296aa516c5..d96045789b5 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -56,7 +56,6 @@ #include <net/netlink.h> #include <linux/skbuff.h> #include <linux/netlink.h> -#include <linux/inotify.h> #include <linux/freezer.h> #include <linux/tty.h> diff --git a/kernel/audit.h b/kernel/audit.h index 208687be4f3..f7206db4e13 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -103,21 +103,27 @@ extern struct mutex audit_filter_mutex; extern void audit_free_rule_rcu(struct rcu_head *); extern struct list_head audit_filter_list[]; +extern struct audit_entry *audit_dupe_rule(struct audit_krule *old); + /* audit watch functions */ -extern unsigned long audit_watch_inode(struct audit_watch *watch); -extern dev_t audit_watch_dev(struct audit_watch *watch); +#ifdef CONFIG_AUDIT_WATCH extern void audit_put_watch(struct audit_watch *watch); extern void audit_get_watch(struct audit_watch *watch); extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op); -extern int audit_add_watch(struct audit_krule *krule); -extern void audit_remove_watch(struct audit_watch *watch); -extern void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list); -extern void audit_inotify_unregister(struct list_head *in_list); +extern int audit_add_watch(struct audit_krule *krule, struct list_head **list); +extern void audit_remove_watch_rule(struct audit_krule *krule); extern char *audit_watch_path(struct audit_watch *watch); -extern struct list_head *audit_watch_rules(struct audit_watch *watch); - -extern struct audit_entry *audit_dupe_rule(struct audit_krule *old, - struct audit_watch *watch); +extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev); +#else +#define audit_put_watch(w) {} +#define audit_get_watch(w) {} +#define audit_to_watch(k, p, l, o) (-EINVAL) +#define audit_add_watch(k, l) (-EINVAL) +#define audit_remove_watch_rule(k) BUG() +#define audit_watch_path(w) "" +#define audit_watch_compare(w, i, d) 0 + +#endif /* CONFIG_AUDIT_WATCH */ #ifdef CONFIG_AUDIT_TREE extern struct audit_chunk *audit_tree_lookup(const struct inode *); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 46a57b57a33..7f18d3a4527 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1,5 +1,5 @@ #include "audit.h" -#include <linux/inotify.h> +#include <linux/fsnotify_backend.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/kthread.h> @@ -22,7 +22,7 @@ struct audit_tree { struct audit_chunk { struct list_head hash; - struct inotify_watch watch; + struct fsnotify_mark mark; struct list_head trees; /* with root here */ int dead; int count; @@ -59,7 +59,7 @@ static LIST_HEAD(prune_list); * tree is refcounted; one reference for "some rules on rules_list refer to * it", one for each chunk with pointer to it. * - * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount + * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount * of watch contributes 1 to .refs). * * node.index allows to get from node.list to containing chunk. @@ -68,7 +68,7 @@ static LIST_HEAD(prune_list); * that makes a difference. Some. */ -static struct inotify_handle *rtree_ih; +static struct fsnotify_group *audit_tree_group; static struct audit_tree *alloc_tree(const char *s) { @@ -111,29 +111,6 @@ const char *audit_tree_path(struct audit_tree *tree) return tree->pathname; } -static struct audit_chunk *alloc_chunk(int count) -{ - struct audit_chunk *chunk; - size_t size; - int i; - - size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node); - chunk = kzalloc(size, GFP_KERNEL); - if (!chunk) - return NULL; - - INIT_LIST_HEAD(&chunk->hash); - INIT_LIST_HEAD(&chunk->trees); - chunk->count = count; - atomic_long_set(&chunk->refs, 1); - for (i = 0; i < count; i++) { - INIT_LIST_HEAD(&chunk->owners[i].list); - chunk->owners[i].index = i; - } - inotify_init_watch(&chunk->watch); - return chunk; -} - static void free_chunk(struct audit_chunk *chunk) { int i; @@ -157,6 +134,35 @@ static void __put_chunk(struct rcu_head *rcu) audit_put_chunk(chunk); } +static void audit_tree_destroy_watch(struct fsnotify_mark *entry) +{ + struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); + call_rcu(&chunk->head, __put_chunk); +} + +static struct audit_chunk *alloc_chunk(int count) +{ + struct audit_chunk *chunk; + size_t size; + int i; + + size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node); + chunk = kzalloc(size, GFP_KERNEL); + if (!chunk) + return NULL; + + INIT_LIST_HEAD(&chunk->hash); + INIT_LIST_HEAD(&chunk->trees); + chunk->count = count; + atomic_long_set(&chunk->refs, 1); + for (i = 0; i < count; i++) { + INIT_LIST_HEAD(&chunk->owners[i].list); + chunk->owners[i].index = i; + } + fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); + return chunk; +} + enum {HASH_SIZE = 128}; static struct list_head chunk_hash_heads[HASH_SIZE]; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock); @@ -167,10 +173,15 @@ static inline struct list_head *chunk_hash(const struct inode *inode) return chunk_hash_heads + n % HASH_SIZE; } -/* hash_lock is held by caller */ +/* hash_lock & entry->lock is held by caller */ static void insert_hash(struct audit_chunk *chunk) { - struct list_head *list = chunk_hash(chunk->watch.inode); + struct fsnotify_mark *entry = &chunk->mark; + struct list_head *list; + + if (!entry->i.inode) + return; + list = chunk_hash(entry->i.inode); list_add_rcu(&chunk->hash, list); } @@ -181,7 +192,8 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode) struct audit_chunk *p; list_for_each_entry_rcu(p, list, hash) { - if (p->watch.inode == inode) { + /* mark.inode may have gone NULL, but who cares? */ + if (p->mark.i.inode == inode) { atomic_long_inc(&p->refs); return p; } @@ -210,38 +222,19 @@ static struct audit_chunk *find_chunk(struct node *p) static void untag_chunk(struct node *p) { struct audit_chunk *chunk = find_chunk(p); + struct fsnotify_mark *entry = &chunk->mark; struct audit_chunk *new; struct audit_tree *owner; int size = chunk->count - 1; int i, j; - if (!pin_inotify_watch(&chunk->watch)) { - /* - * Filesystem is shutting down; all watches are getting - * evicted, just take it off the node list for this - * tree and let the eviction logics take care of the - * rest. - */ - owner = p->owner; - if (owner->root == chunk) { - list_del_init(&owner->same_root); - owner->root = NULL; - } - list_del_init(&p->list); - p->owner = NULL; - put_tree(owner); - return; - } + fsnotify_get_mark(entry); spin_unlock(&hash_lock); - /* - * pin_inotify_watch() succeeded, so the watch won't go away - * from under us. - */ - mutex_lock(&chunk->watch.inode->inotify_mutex); - if (chunk->dead) { - mutex_unlock(&chunk->watch.inode->inotify_mutex); + spin_lock(&entry->lock); + if (chunk->dead || !entry->i.inode) { + spin_unlock(&entry->lock); goto out; } @@ -256,16 +249,17 @@ static void untag_chunk(struct node *p) list_del_init(&p->list); list_del_rcu(&chunk->hash); spin_unlock(&hash_lock); - inotify_evict_watch(&chunk->watch); - mutex_unlock(&chunk->watch.inode->inotify_mutex); - put_inotify_watch(&chunk->watch); + spin_unlock(&entry->lock); + fsnotify_destroy_mark(entry); + fsnotify_put_mark(entry); goto out; } new = alloc_chunk(size); if (!new) goto Fallback; - if (inotify_clone_watch(&chunk->watch, &new->watch) < 0) { + fsnotify_duplicate_mark(&new->mark, entry); + if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { free_chunk(new); goto Fallback; } @@ -298,9 +292,9 @@ static void untag_chunk(struct node *p) list_for_each_entry(owner, &new->trees, same_root) owner->root = new; spin_unlock(&hash_lock); - inotify_evict_watch(&chunk->watch); - mutex_unlock(&chunk->watch.inode->inotify_mutex); - put_inotify_watch(&chunk->watch); + spin_unlock(&entry->lock); + fsnotify_destroy_mark(entry); + fsnotify_put_mark(entry); goto out; Fallback: @@ -314,31 +308,33 @@ Fallback: p->owner = NULL; put_tree(owner); spin_unlock(&hash_lock); - mutex_unlock(&chunk->watch.inode->inotify_mutex); + spin_unlock(&entry->lock); out: - unpin_inotify_watch(&chunk->watch); + fsnotify_put_mark(entry); spin_lock(&hash_lock); } static int create_chunk(struct inode *inode, struct audit_tree *tree) { + struct fsnotify_mark *entry; struct audit_chunk *chunk = alloc_chunk(1); if (!chunk) return -ENOMEM; - if (inotify_add_watch(rtree_ih, &chunk->watch, inode, IN_IGNORED | IN_DELETE_SELF) < 0) { + entry = &chunk->mark; + if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { free_chunk(chunk); return -ENOSPC; } - mutex_lock(&inode->inotify_mutex); + spin_lock(&entry->lock); spin_lock(&hash_lock); if (tree->goner) { spin_unlock(&hash_lock); chunk->dead = 1; - inotify_evict_watch(&chunk->watch); - mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(&chunk->watch); + spin_unlock(&entry->lock); + fsnotify_destroy_mark(entry); + fsnotify_put_mark(entry); return 0; } chunk->owners[0].index = (1U << 31); @@ -351,30 +347,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) } insert_hash(chunk); spin_unlock(&hash_lock); - mutex_unlock(&inode->inotify_mutex); + spin_unlock(&entry->lock); return 0; } /* the first tagged inode becomes root of tree */ static int tag_chunk(struct inode *inode, struct audit_tree *tree) { - struct inotify_watch *watch; + struct fsnotify_mark *old_entry, *chunk_entry; struct audit_tree *owner; struct audit_chunk *chunk, *old; struct node *p; int n; - if (inotify_find_watch(rtree_ih, inode, &watch) < 0) + old_entry = fsnotify_find_inode_mark(audit_tree_group, inode); + if (!old_entry) return create_chunk(inode, tree); - old = container_of(watch, struct audit_chunk, watch); + old = container_of(old_entry, struct audit_chunk, mark); /* are we already there? */ spin_lock(&hash_lock); for (n = 0; n < old->count; n++) { if (old->owners[n].owner == tree) { spin_unlock(&hash_lock); - put_inotify_watch(&old->watch); + fsnotify_put_mark(old_entry); return 0; } } @@ -382,25 +379,44 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) chunk = alloc_chunk(old->count + 1); if (!chunk) { - put_inotify_watch(&old->watch); + fsnotify_put_mark(old_entry); return -ENOMEM; } - mutex_lock(&inode->inotify_mutex); - if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) { - mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(&old->watch); + chunk_entry = &chunk->mark; + + spin_lock(&old_entry->lock); + if (!old_entry->i.inode) { + /* old_entry is being shot, lets just lie */ + spin_unlock(&old_entry->lock); + fsnotify_put_mark(old_entry); free_chunk(chunk); + return -ENOENT; + } + + fsnotify_duplicate_mark(chunk_entry, old_entry); + if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { + spin_unlock(&old_entry->lock); + free_chunk(chunk); + fsnotify_put_mark(old_entry); return -ENOSPC; } + + /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */ + spin_lock(&chunk_entry->lock); spin_lock(&hash_lock); + + /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */ if (tree->goner) { spin_unlock(&hash_lock); chunk->dead = 1; - inotify_evict_watch(&chunk->watch); - mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(&old->watch); - put_inotify_watch(&chunk->watch); + spin_unlock(&chunk_entry->lock); + spin_unlock(&old_entry->lock); + + fsnotify_destroy_mark(chunk_entry); + + fsnotify_put_mark(chunk_entry); + fsnotify_put_mark(old_entry); return 0; } list_replace_init(&old->trees, &chunk->trees); @@ -426,10 +442,11 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) list_add(&tree->same_root, &chunk->trees); } spin_unlock(&hash_lock); - inotify_evict_watch(&old->watch); - mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(&old->watch); /* pair to inotify_find_watch */ - put_inotify_watch(&old->watch); /* and kill it */ + spin_unlock(&chunk_entry->lock); + spin_unlock(&old_entry->lock); + fsnotify_destroy_mark(old_entry); + fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ + fsnotify_put_mark(old_entry); /* and kill it */ return 0; } @@ -584,7 +601,9 @@ void audit_trim_trees(void) spin_lock(&hash_lock); list_for_each_entry(node, &tree->chunks, list) { - struct inode *inode = find_chunk(node)->watch.inode; + struct audit_chunk *chunk = find_chunk(node); + /* this could be NULL if the watch is dieing else where... */ + struct inode *inode = chunk->mark.i.inode; node->index |= 1U<<31; if (iterate_mounts(compare_root, inode, root_mnt)) node->index &= ~(1U<<31); @@ -846,7 +865,6 @@ void audit_kill_trees(struct list_head *list) * Here comes the stuff asynchronous to auditctl operations */ -/* inode->inotify_mutex is locked */ static void evict_chunk(struct audit_chunk *chunk) { struct audit_tree *owner; @@ -885,35 +903,46 @@ static void evict_chunk(struct audit_chunk *chunk) mutex_unlock(&audit_filter_mutex); } -static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask, - u32 cookie, const char *dname, struct inode *inode) +static int audit_tree_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmonut_mark, + struct fsnotify_event *event) +{ + BUG(); + return -EOPNOTSUPP; +} + +static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group) { - struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch); + struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); - if (mask & IN_IGNORED) { - evict_chunk(chunk); - put_inotify_watch(watch); - } + evict_chunk(chunk); + fsnotify_put_mark(entry); } -static void destroy_watch(struct inotify_watch *watch) +static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + __u32 mask, void *data, int data_type) { - struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch); - call_rcu(&chunk->head, __put_chunk); + return false; } -static const struct inotify_operations rtree_inotify_ops = { - .handle_event = handle_event, - .destroy_watch = destroy_watch, +static const struct fsnotify_ops audit_tree_ops = { + .handle_event = audit_tree_handle_event, + .should_send_event = audit_tree_send_event, + .free_group_priv = NULL, + .free_event_priv = NULL, + .freeing_mark = audit_tree_freeing_mark, }; static int __init audit_tree_init(void) { int i; - rtree_ih = inotify_init(&rtree_inotify_ops); - if (IS_ERR(rtree_ih)) - audit_panic("cannot initialize inotify handle for rectree watches"); + audit_tree_group = fsnotify_alloc_group(&audit_tree_ops); + if (IS_ERR(audit_tree_group)) + audit_panic("cannot initialize fsnotify group for rectree watches"); for (i = 0; i < HASH_SIZE; i++) INIT_LIST_HEAD(&chunk_hash_heads[i]); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 8df43696f4b..f0c9b2e7542 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -24,18 +24,18 @@ #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/fs.h> +#include <linux/fsnotify_backend.h> #include <linux/namei.h> #include <linux/netlink.h> #include <linux/sched.h> #include <linux/slab.h> -#include <linux/inotify.h> #include <linux/security.h> #include "audit.h" /* * Reference counting: * - * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED + * audit_parent: lifetime is from audit_init_parent() to receipt of an FS_IGNORED * event. Each audit_watch holds a reference to its associated parent. * * audit_watch: if added to lists, lifetime is from audit_init_watch() to @@ -51,40 +51,61 @@ struct audit_watch { unsigned long ino; /* associated inode number */ struct audit_parent *parent; /* associated parent */ struct list_head wlist; /* entry in parent->watches list */ - struct list_head rules; /* associated rules */ + struct list_head rules; /* anchor for krule->rlist */ }; struct audit_parent { - struct list_head ilist; /* entry in inotify registration list */ - struct list_head watches; /* associated watches */ - struct inotify_watch wdata; /* inotify watch data */ - unsigned flags; /* status flags */ + struct list_head watches; /* anchor for audit_watch->wlist */ + struct fsnotify_mark mark; /* fsnotify mark on the inode */ }; -/* Inotify handle. */ -struct inotify_handle *audit_ih; +/* fsnotify handle. */ +struct fsnotify_group *audit_watch_group; -/* - * audit_parent status flags: - * - * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to - * a filesystem event to ensure we're adding audit watches to a valid parent. - * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot - * receive them while we have nameidata, but must be used for IN_MOVE_SELF which - * we can receive while holding nameidata. - */ -#define AUDIT_PARENT_INVALID 0x001 +/* fsnotify events we care about. */ +#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ + FS_MOVE_SELF | FS_EVENT_ON_CHILD) -/* Inotify events we care about. */ -#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF +static void audit_free_parent(struct audit_parent *parent) +{ + WARN_ON(!list_empty(&parent->watches)); + kfree(parent); +} -static void audit_free_parent(struct inotify_watch *i_watch) +static void audit_watch_free_mark(struct fsnotify_mark *entry) { struct audit_parent *parent; - parent = container_of(i_watch, struct audit_parent, wdata); - WARN_ON(!list_empty(&parent->watches)); - kfree(parent); + parent = container_of(entry, struct audit_parent, mark); + audit_free_parent(parent); +} + +static void audit_get_parent(struct audit_parent *parent) +{ + if (likely(parent)) + fsnotify_get_mark(&parent->mark); +} + +static void audit_put_parent(struct audit_parent *parent) +{ + if (likely(parent)) + fsnotify_put_mark(&parent->mark); +} + +/* + * Find and return the audit_parent on the given inode. If found a reference + * is taken on this parent. + */ +static inline struct audit_parent *audit_find_parent(struct inode *inode) +{ + struct audit_parent *parent = NULL; + struct fsnotify_mark *entry; + + entry = fsnotify_find_inode_mark(audit_watch_group, inode); + if (entry) + parent = container_of(entry, struct audit_parent, mark); + + return parent; } void audit_get_watch(struct audit_watch *watch) @@ -105,7 +126,7 @@ void audit_put_watch(struct audit_watch *watch) void audit_remove_watch(struct audit_watch *watch) { list_del(&watch->wlist); - put_inotify_watch(&watch->parent->wdata); + audit_put_parent(watch->parent); watch->parent = NULL; audit_put_watch(watch); /* match initial get */ } @@ -115,42 +136,32 @@ char *audit_watch_path(struct audit_watch *watch) return watch->path; } -struct list_head *audit_watch_rules(struct audit_watch *watch) -{ - return &watch->rules; -} - -unsigned long audit_watch_inode(struct audit_watch *watch) +int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev) { - return watch->ino; -} - -dev_t audit_watch_dev(struct audit_watch *watch) -{ - return watch->dev; + return (watch->ino != (unsigned long)-1) && + (watch->ino == ino) && + (watch->dev == dev); } /* Initialize a parent watch entry. */ static struct audit_parent *audit_init_parent(struct nameidata *ndp) { + struct inode *inode = ndp->path.dentry->d_inode; struct audit_parent *parent; - s32 wd; + int ret; parent = kzalloc(sizeof(*parent), GFP_KERNEL); if (unlikely(!parent)) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&parent->watches); - parent->flags = 0; - - inotify_init_watch(&parent->wdata); - /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */ - get_inotify_watch(&parent->wdata); - wd = inotify_add_watch(audit_ih, &parent->wdata, - ndp->path.dentry->d_inode, AUDIT_IN_WATCH); - if (wd < 0) { - audit_free_parent(&parent->wdata); - return ERR_PTR(wd); + + fsnotify_init_mark(&parent->mark, audit_watch_free_mark); + parent->mark.mask = AUDIT_FS_WATCH; + ret = fsnotify_add_mark(&parent->mark, audit_watch_group, inode, NULL, 0); + if (ret < 0) { + audit_free_parent(parent); + return ERR_PTR(ret); } return parent; @@ -179,7 +190,7 @@ int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op) { struct audit_watch *watch; - if (!audit_ih) + if (!audit_watch_group) return -EOPNOTSUPP; if (path[0] != '/' || path[len-1] == '/' || @@ -217,7 +228,7 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old) new->dev = old->dev; new->ino = old->ino; - get_inotify_watch(&old->parent->wdata); + audit_get_parent(old->parent); new->parent = old->parent; out: @@ -251,15 +262,19 @@ static void audit_update_watch(struct audit_parent *parent, struct audit_entry *oentry, *nentry; mutex_lock(&audit_filter_mutex); + /* Run all of the watches on this parent looking for the one that + * matches the given dname */ list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { if (audit_compare_dname_path(dname, owatch->path, NULL)) continue; /* If the update involves invalidating rules, do the inode-based * filtering now, so we don't omit records. */ - if (invalidating && current->audit_context) + if (invalidating && !audit_dummy_context()) audit_filter_inodes(current, current->audit_context); + /* updating ino will likely change which audit_hash_list we + * are on so we need a new watch for the new list */ nwatch = audit_dupe_watch(owatch); if (IS_ERR(nwatch)) { mutex_unlock(&audit_filter_mutex); @@ -275,12 +290,21 @@ static void audit_update_watch(struct audit_parent *parent, list_del(&oentry->rule.rlist); list_del_rcu(&oentry->list); - nentry = audit_dupe_rule(&oentry->rule, nwatch); + nentry = audit_dupe_rule(&oentry->rule); if (IS_ERR(nentry)) { list_del(&oentry->rule.list); audit_panic("error updating watch, removing"); } else { int h = audit_hash_ino((u32)ino); + + /* + * nentry->rule.watch == oentry->rule.watch so + * we must drop that reference and set it to our + * new watch. + */ + audit_put_watch(nentry->rule.watch); + audit_get_watch(nwatch); + nentry->rule.watch = nwatch; list_add(&nentry->rule.rlist, &nwatch->rules); list_add_rcu(&nentry->list, &audit_inode_hash[h]); list_replace(&oentry->rule.list, @@ -312,7 +336,6 @@ static void audit_remove_parent_watches(struct audit_parent *parent) struct audit_entry *e; mutex_lock(&audit_filter_mutex); - parent->flags |= AUDIT_PARENT_INVALID; list_for_each_entry_safe(w, nextw, &parent->watches, wlist) { list_for_each_entry_safe(r, nextr, &w->rules, rlist) { e = container_of(r, struct audit_entry, rule); @@ -325,20 +348,8 @@ static void audit_remove_parent_watches(struct audit_parent *parent) audit_remove_watch(w); } mutex_unlock(&audit_filter_mutex); -} - -/* Unregister inotify watches for parents on in_list. - * Generates an IN_IGNORED event. */ -void audit_inotify_unregister(struct list_head *in_list) -{ - struct audit_parent *p, *n; - list_for_each_entry_safe(p, n, in_list, ilist) { - list_del(&p->ilist); - inotify_rm_watch(audit_ih, &p->wdata); - /* the unpin matching the pin in audit_do_del_rule() */ - unpin_inotify_watch(&p->wdata); - } + fsnotify_destroy_mark(&parent->mark); } /* Get path information necessary for adding watches. */ @@ -389,7 +400,7 @@ static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) } } -/* Associate the given rule with an existing parent inotify_watch. +/* Associate the given rule with an existing parent. * Caller must hold audit_filter_mutex. */ static void audit_add_to_parent(struct audit_krule *krule, struct audit_parent *parent) @@ -397,6 +408,8 @@ static void audit_add_to_parent(struct audit_krule *krule, struct audit_watch *w, *watch = krule->watch; int watch_found = 0; + BUG_ON(!mutex_is_locked(&audit_filter_mutex)); + list_for_each_entry(w, &parent->watches, wlist) { if (strcmp(watch->path, w->path)) continue; @@ -413,7 +426,7 @@ static void audit_add_to_parent(struct audit_krule *krule, } if (!watch_found) { - get_inotify_watch(&parent->wdata); + audit_get_parent(parent); watch->parent = parent; list_add(&watch->wlist, &parent->watches); @@ -423,13 +436,12 @@ static void audit_add_to_parent(struct audit_krule *krule, /* Find a matching watch entry, or add this one. * Caller must hold audit_filter_mutex. */ -int audit_add_watch(struct audit_krule *krule) +int audit_add_watch(struct audit_krule *krule, struct list_head **list) { struct audit_watch *watch = krule->watch; - struct inotify_watch *i_watch; struct audit_parent *parent; struct nameidata *ndp = NULL, *ndw = NULL; - int ret = 0; + int h, ret = 0; mutex_unlock(&audit_filter_mutex); @@ -441,47 +453,38 @@ int audit_add_watch(struct audit_krule *krule) goto error; } + mutex_lock(&audit_filter_mutex); + /* update watch filter fields */ if (ndw) { watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev; watch->ino = ndw->path.dentry->d_inode->i_ino; } - /* The audit_filter_mutex must not be held during inotify calls because - * we hold it during inotify event callback processing. If an existing - * inotify watch is found, inotify_find_watch() grabs a reference before - * returning. - */ - if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode, - &i_watch) < 0) { + /* either find an old parent or attach a new one */ + parent = audit_find_parent(ndp->path.dentry->d_inode); + if (!parent) { parent = audit_init_parent(ndp); if (IS_ERR(parent)) { - /* caller expects mutex locked */ - mutex_lock(&audit_filter_mutex); ret = PTR_ERR(parent); goto error; } - } else - parent = container_of(i_watch, struct audit_parent, wdata); - - mutex_lock(&audit_filter_mutex); + } - /* parent was moved before we took audit_filter_mutex */ - if (parent->flags & AUDIT_PARENT_INVALID) - ret = -ENOENT; - else - audit_add_to_parent(krule, parent); + audit_add_to_parent(krule, parent); - /* match get in audit_init_parent or inotify_find_watch */ - put_inotify_watch(&parent->wdata); + /* match get in audit_find_parent or audit_init_parent */ + audit_put_parent(parent); + h = audit_hash_ino((u32)watch->ino); + *list = &audit_inode_hash[h]; error: audit_put_nd(ndp, ndw); /* NULL args OK */ return ret; } -void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list) +void audit_remove_watch_rule(struct audit_krule *krule) { struct audit_watch *watch = krule->watch; struct audit_parent *parent = watch->parent; @@ -492,53 +495,74 @@ void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list) audit_remove_watch(watch); if (list_empty(&parent->watches)) { - /* Put parent on the inotify un-registration - * list. Grab a reference before releasing - * audit_filter_mutex, to be released in - * audit_inotify_unregister(). - * If filesystem is going away, just leave - * the sucker alone, eviction will take - * care of it. */ - if (pin_inotify_watch(&parent->wdata)) - list_add(&parent->ilist, list); + audit_get_parent(parent); + fsnotify_destroy_mark(&parent->mark); + audit_put_parent(parent); } } } -/* Update watch data in audit rules based on inotify events. */ -static void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask, - u32 cookie, const char *dname, struct inode *inode) +static bool audit_watch_should_send_event(struct fsnotify_group *group, struct inode *inode, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + __u32 mask, void *data, int data_type) +{ + return true; +} + +/* Update watch data in audit rules based on fsnotify events. */ +static int audit_watch_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + struct fsnotify_event *event) { + struct inode *inode; + __u32 mask = event->mask; + const char *dname = event->file_name; struct audit_parent *parent; - parent = container_of(i_watch, struct audit_parent, wdata); + parent = container_of(inode_mark, struct audit_parent, mark); - if (mask & (IN_CREATE|IN_MOVED_TO) && inode) - audit_update_watch(parent, dname, inode->i_sb->s_dev, - inode->i_ino, 0); - else if (mask & (IN_DELETE|IN_MOVED_FROM)) + BUG_ON(group != audit_watch_group); + + switch (event->data_type) { + case (FSNOTIFY_EVENT_PATH): + inode = event->path.dentry->d_inode; + break; + case (FSNOTIFY_EVENT_INODE): + inode = event->inode; + break; + default: + BUG(); + inode = NULL; + break; + }; + + if (mask & (FS_CREATE|FS_MOVED_TO) && inode) + audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0); + else if (mask & (FS_DELETE|FS_MOVED_FROM)) audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1); - /* inotify automatically removes the watch and sends IN_IGNORED */ - else if (mask & (IN_DELETE_SELF|IN_UNMOUNT)) - audit_remove_parent_watches(parent); - /* inotify does not remove the watch, so remove it manually */ - else if(mask & IN_MOVE_SELF) { + else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF)) audit_remove_parent_watches(parent); - inotify_remove_watch_locked(audit_ih, i_watch); - } else if (mask & IN_IGNORED) - put_inotify_watch(i_watch); + + return 0; } -static const struct inotify_operations audit_inotify_ops = { - .handle_event = audit_handle_ievent, - .destroy_watch = audit_free_parent, +static const struct fsnotify_ops audit_watch_fsnotify_ops = { + .should_send_event = audit_watch_should_send_event, + .handle_event = audit_watch_handle_event, + .free_group_priv = NULL, + .freeing_mark = NULL, + .free_event_priv = NULL, }; static int __init audit_watch_init(void) { - audit_ih = inotify_init(&audit_inotify_ops); - if (IS_ERR(audit_ih)) - audit_panic("cannot initialize inotify handle"); + audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops); + if (IS_ERR(audit_watch_group)) { + audit_watch_group = NULL; + audit_panic("cannot create audit fsnotify group"); + } return 0; } -subsys_initcall(audit_watch_init); +device_initcall(audit_watch_init); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index ce08041f578..eb7675499fb 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -71,6 +71,7 @@ static inline void audit_free_rule(struct audit_entry *e) { int i; struct audit_krule *erule = &e->rule; + /* some rules don't have associated watches */ if (erule->watch) audit_put_watch(erule->watch); @@ -746,8 +747,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df, * rule with the new rule in the filterlist, then free the old rule. * The rlist element is undefined; list manipulations are handled apart from * the initial copy. */ -struct audit_entry *audit_dupe_rule(struct audit_krule *old, - struct audit_watch *watch) +struct audit_entry *audit_dupe_rule(struct audit_krule *old) { u32 fcount = old->field_count; struct audit_entry *entry; @@ -769,8 +769,8 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old, new->prio = old->prio; new->buflen = old->buflen; new->inode_f = old->inode_f; - new->watch = NULL; new->field_count = old->field_count; + /* * note that we are OK with not refcounting here; audit_match_tree() * never dereferences tree and we can't get false positives there @@ -811,9 +811,9 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old, } } - if (watch) { - audit_get_watch(watch); - new->watch = watch; + if (old->watch) { + audit_get_watch(old->watch); + new->watch = old->watch; } return entry; @@ -866,7 +866,7 @@ static inline int audit_add_rule(struct audit_entry *entry) struct audit_watch *watch = entry->rule.watch; struct audit_tree *tree = entry->rule.tree; struct list_head *list; - int h, err; + int err; #ifdef CONFIG_AUDITSYSCALL int dont_count = 0; @@ -889,15 +889,11 @@ static inline int audit_add_rule(struct audit_entry *entry) if (watch) { /* audit_filter_mutex is dropped and re-taken during this call */ - err = audit_add_watch(&entry->rule); + err = audit_add_watch(&entry->rule, &list); if (err) { mutex_unlock(&audit_filter_mutex); goto error; } - /* entry->rule.watch may have changed during audit_add_watch() */ - watch = entry->rule.watch; - h = audit_hash_ino((u32)audit_watch_inode(watch)); - list = &audit_inode_hash[h]; } if (tree) { err = audit_add_tree_rule(&entry->rule); @@ -949,7 +945,6 @@ static inline int audit_del_rule(struct audit_entry *entry) struct audit_watch *watch = entry->rule.watch; struct audit_tree *tree = entry->rule.tree; struct list_head *list; - LIST_HEAD(inotify_list); int ret = 0; #ifdef CONFIG_AUDITSYSCALL int dont_count = 0; @@ -969,7 +964,7 @@ static inline int audit_del_rule(struct audit_entry *entry) } if (e->rule.watch) - audit_remove_watch_rule(&e->rule, &inotify_list); + audit_remove_watch_rule(&e->rule); if (e->rule.tree) audit_remove_tree_rule(&e->rule); @@ -987,9 +982,6 @@ static inline int audit_del_rule(struct audit_entry *entry) #endif mutex_unlock(&audit_filter_mutex); - if (!list_empty(&inotify_list)) - audit_inotify_unregister(&inotify_list); - out: if (watch) audit_put_watch(watch); /* match initial get */ @@ -1323,30 +1315,23 @@ static int update_lsm_rule(struct audit_krule *r) { struct audit_entry *entry = container_of(r, struct audit_entry, rule); struct audit_entry *nentry; - struct audit_watch *watch; - struct audit_tree *tree; int err = 0; if (!security_audit_rule_known(r)) return 0; - watch = r->watch; - tree = r->tree; - nentry = audit_dupe_rule(r, watch); + nentry = audit_dupe_rule(r); if (IS_ERR(nentry)) { /* save the first error encountered for the * return value */ err = PTR_ERR(nentry); audit_panic("error updating LSM filters"); - if (watch) + if (r->watch) list_del(&r->rlist); list_del_rcu(&entry->list); list_del(&r->list); } else { - if (watch) { - list_add(&nentry->rule.rlist, audit_watch_rules(watch)); - list_del(&r->rlist); - } else if (tree) + if (r->watch || r->tree) list_replace_init(&r->rlist, &nentry->rule.rlist); list_replace_rcu(&entry->list, &nentry->list); list_replace(&r->list, &nentry->rule.list); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3828ad5fb8f..1b31c130d03 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -65,7 +65,6 @@ #include <linux/binfmts.h> #include <linux/highmem.h> #include <linux/syscalls.h> -#include <linux/inotify.h> #include <linux/capability.h> #include <linux/fs_struct.h> @@ -549,9 +548,8 @@ static int audit_filter_rules(struct task_struct *tsk, } break; case AUDIT_WATCH: - if (name && audit_watch_inode(rule->watch) != (unsigned long)-1) - result = (name->dev == audit_watch_dev(rule->watch) && - name->ino == audit_watch_inode(rule->watch)); + if (name) + result = audit_watch_compare(rule->watch, name->ino, name->dev); break; case AUDIT_DIR: if (ctx) @@ -1726,7 +1724,7 @@ static inline void handle_one(const struct inode *inode) struct audit_tree_refs *p; struct audit_chunk *chunk; int count; - if (likely(list_empty(&inode->inotify_watches))) + if (likely(hlist_empty(&inode->i_fsnotify_marks))) return; context = current->audit_context; p = context->trees; @@ -1769,7 +1767,7 @@ retry: seq = read_seqbegin(&rename_lock); for(;;) { struct inode *inode = d->d_inode; - if (inode && unlikely(!list_empty(&inode->inotify_watches))) { + if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); if (chunk) { @@ -1837,13 +1835,8 @@ void __audit_getname(const char *name) context->names[context->name_count].ino = (unsigned long)-1; context->names[context->name_count].osid = 0; ++context->name_count; - if (!context->pwd.dentry) { - read_lock(¤t->fs->lock); - context->pwd = current->fs->pwd; - path_get(¤t->fs->pwd); - read_unlock(¤t->fs->lock); - } - + if (!context->pwd.dentry) + get_fs_pwd(current->fs, &context->pwd); } /* audit_putname - intercept a putname request diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d83cab06da8..192f88c5b0f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1102,7 +1102,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) if (opts->release_agent) return -EINVAL; opts->release_agent = - kstrndup(token + 14, PATH_MAX, GFP_KERNEL); + kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; } else if (!strncmp(token, "name=", 5)) { @@ -1123,7 +1123,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) if (opts->name) return -EINVAL; opts->name = kstrndup(name, - MAX_CGROUP_ROOT_NAMELEN, + MAX_CGROUP_ROOT_NAMELEN - 1, GFP_KERNEL); if (!opts->name) return -ENOMEM; diff --git a/kernel/compat.c b/kernel/compat.c index 5adab05a317..e167efce842 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -279,11 +279,6 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { struct rlimit r; - int ret; - mm_segment_t old_fs = get_fs (); - - if (resource >= RLIM_NLIMITS) - return -EINVAL; if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) || __get_user(r.rlim_cur, &rlim->rlim_cur) || @@ -294,10 +289,7 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource, r.rlim_cur = RLIM_INFINITY; if (r.rlim_max == COMPAT_RLIM_INFINITY) r.rlim_max = RLIM_INFINITY; - set_fs(KERNEL_DS); - ret = sys_setrlimit(resource, (struct rlimit __user *) &r); - set_fs(old_fs); - return ret; + return do_prlimit(current, resource, &r, NULL); } #ifdef COMPAT_RLIM_OLD_INFINITY @@ -329,16 +321,13 @@ asmlinkage long compat_sys_old_getrlimit(unsigned int resource, #endif -asmlinkage long compat_sys_getrlimit (unsigned int resource, +asmlinkage long compat_sys_getrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { struct rlimit r; int ret; - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_getrlimit(resource, (struct rlimit __user *) &r); - set_fs(old_fs); + ret = do_prlimit(current, resource, NULL, &r); if (!ret) { if (r.rlim_cur > COMPAT_RLIM_INFINITY) r.rlim_cur = COMPAT_RLIM_INFINITY; diff --git a/kernel/cred.c b/kernel/cred.c index 60bc8b1e32e..9a3e22641fe 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -22,10 +22,6 @@ #define kdebug(FMT, ...) \ printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__) #else -static inline __attribute__((format(printf, 1, 2))) -void no_printk(const char *fmt, ...) -{ -} #define kdebug(FMT, ...) \ no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__) #endif diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index dd62f8e714c..0dbeae37422 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -134,23 +134,14 @@ unregister: return 0; } -int -__set_personality(unsigned int personality) +int __set_personality(unsigned int personality) { - struct exec_domain *ep, *oep; - - ep = lookup_exec_domain(personality); - if (ep == current_thread_info()->exec_domain) { - current->personality = personality; - module_put(ep->module); - return 0; - } + struct exec_domain *oep = current_thread_info()->exec_domain; + current_thread_info()->exec_domain = lookup_exec_domain(personality); current->personality = personality; - oep = current_thread_info()->exec_domain; - current_thread_info()->exec_domain = ep; - module_put(oep->module); + return 0; } @@ -192,11 +183,8 @@ SYSCALL_DEFINE1(personality, unsigned int, personality) { unsigned int old = current->personality; - if (personality != 0xffffffff) { + if (personality != 0xffffffff) set_personality(personality); - if (current->personality != personality) - return -EINVAL; - } return old; } diff --git a/kernel/exit.c b/kernel/exit.c index ceffc67b564..671ed56e0a4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -771,9 +771,12 @@ static void forget_original_parent(struct task_struct *father) struct task_struct *p, *n, *reaper; LIST_HEAD(dead_children); - exit_ptrace(father); - write_lock_irq(&tasklist_lock); + /* + * Note that exit_ptrace() and find_new_reaper() might + * drop tasklist_lock and reacquire it. + */ + exit_ptrace(father); reaper = find_new_reaper(father); list_for_each_entry_safe(p, n, &father->children, sibling) { diff --git a/kernel/fork.c b/kernel/fork.c index a82a65cef74..98b450876f9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -899,6 +899,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) tty_audit_fork(sig); sig->oom_adj = current->signal->oom_adj; + sig->oom_score_adj = current->signal->oom_score_adj; return 0; } diff --git a/kernel/kexec.c b/kernel/kexec.c index 131b1703936..c0613f7d673 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -151,8 +151,10 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, image->nr_segments = nr_segments; segment_bytes = nr_segments * sizeof(*segments); result = copy_from_user(image->segment, segments, segment_bytes); - if (result) + if (result) { + result = -EFAULT; goto out; + } /* * Verify we have good destination addresses. The caller is @@ -827,7 +829,7 @@ static int kimage_load_normal_segment(struct kimage *image, result = copy_from_user(ptr, buf, uchunk); kunmap(page); if (result) { - result = (result < 0) ? result : -EIO; + result = -EFAULT; goto out; } ubytes -= uchunk; @@ -882,7 +884,7 @@ static int kimage_load_crash_segment(struct kimage *image, kexec_flush_icache_page(page); kunmap(page); if (result) { - result = (result < 0) ? result : -EIO; + result = -EFAULT; goto out; } ubytes -= uchunk; diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 35edbe22e9a..4502604ecad 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -1,8 +1,7 @@ /* - * A generic kernel FIFO implementation. + * A generic kernel FIFO implementation * - * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net> - * Copyright (C) 2004 Stelian Pop <stelian@popies.net> + * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -11,7 +10,7 @@ * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License @@ -24,422 +23,579 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/err.h> -#include <linux/kfifo.h> #include <linux/log2.h> #include <linux/uaccess.h> +#include <linux/kfifo.h> -static void _kfifo_init(struct kfifo *fifo, void *buffer, - unsigned int size) -{ - fifo->buffer = buffer; - fifo->size = size; - - kfifo_reset(fifo); -} - -/** - * kfifo_init - initialize a FIFO using a preallocated buffer - * @fifo: the fifo to assign the buffer - * @buffer: the preallocated buffer to be used. - * @size: the size of the internal buffer, this has to be a power of 2. - * +/* + * internal helper to calculate the unused elements in a fifo */ -void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size) +static inline unsigned int kfifo_unused(struct __kfifo *fifo) { - /* size must be a power of 2 */ - BUG_ON(!is_power_of_2(size)); - - _kfifo_init(fifo, buffer, size); + return (fifo->mask + 1) - (fifo->in - fifo->out); } -EXPORT_SYMBOL(kfifo_init); -/** - * kfifo_alloc - allocates a new FIFO internal buffer - * @fifo: the fifo to assign then new buffer - * @size: the size of the buffer to be allocated, this have to be a power of 2. - * @gfp_mask: get_free_pages mask, passed to kmalloc() - * - * This function dynamically allocates a new fifo internal buffer - * - * The size will be rounded-up to a power of 2. - * The buffer will be release with kfifo_free(). - * Return 0 if no error, otherwise the an error code - */ -int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask) +int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, + size_t esize, gfp_t gfp_mask) { - unsigned char *buffer; - /* - * round up to the next power of 2, since our 'let the indices + * round down to the next power of 2, since our 'let the indices * wrap' technique works only in this case. */ - if (!is_power_of_2(size)) { - BUG_ON(size > 0x80000000); - size = roundup_pow_of_two(size); + if (!is_power_of_2(size)) + size = rounddown_pow_of_two(size); + + fifo->in = 0; + fifo->out = 0; + fifo->esize = esize; + + if (size < 2) { + fifo->data = NULL; + fifo->mask = 0; + return -EINVAL; } - buffer = kmalloc(size, gfp_mask); - if (!buffer) { - _kfifo_init(fifo, NULL, 0); + fifo->data = kmalloc(size * esize, gfp_mask); + + if (!fifo->data) { + fifo->mask = 0; return -ENOMEM; } - - _kfifo_init(fifo, buffer, size); + fifo->mask = size - 1; return 0; } -EXPORT_SYMBOL(kfifo_alloc); +EXPORT_SYMBOL(__kfifo_alloc); -/** - * kfifo_free - frees the FIFO internal buffer - * @fifo: the fifo to be freed. - */ -void kfifo_free(struct kfifo *fifo) +void __kfifo_free(struct __kfifo *fifo) { - kfree(fifo->buffer); - _kfifo_init(fifo, NULL, 0); + kfree(fifo->data); + fifo->in = 0; + fifo->out = 0; + fifo->esize = 0; + fifo->data = NULL; + fifo->mask = 0; } -EXPORT_SYMBOL(kfifo_free); +EXPORT_SYMBOL(__kfifo_free); -/** - * kfifo_skip - skip output data - * @fifo: the fifo to be used. - * @len: number of bytes to skip - */ -void kfifo_skip(struct kfifo *fifo, unsigned int len) +int __kfifo_init(struct __kfifo *fifo, void *buffer, + unsigned int size, size_t esize) { - if (len < kfifo_len(fifo)) { - __kfifo_add_out(fifo, len); - return; + size /= esize; + + if (!is_power_of_2(size)) + size = rounddown_pow_of_two(size); + + fifo->in = 0; + fifo->out = 0; + fifo->esize = esize; + fifo->data = buffer; + + if (size < 2) { + fifo->mask = 0; + return -EINVAL; } - kfifo_reset_out(fifo); + fifo->mask = size - 1; + + return 0; } -EXPORT_SYMBOL(kfifo_skip); +EXPORT_SYMBOL(__kfifo_init); -static inline void __kfifo_in_data(struct kfifo *fifo, - const void *from, unsigned int len, unsigned int off) +static void kfifo_copy_in(struct __kfifo *fifo, const void *src, + unsigned int len, unsigned int off) { + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; unsigned int l; + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + memcpy(fifo->data + off, src, l); + memcpy(fifo->data, src + l, len - l); /* - * Ensure that we sample the fifo->out index -before- we - * start putting bytes into the kfifo. + * make sure that the data in the fifo is up to date before + * incrementing the fifo->in index counter */ + smp_wmb(); +} - smp_mb(); - - off = __kfifo_off(fifo, fifo->in + off); +unsigned int __kfifo_in(struct __kfifo *fifo, + const void *buf, unsigned int len) +{ + unsigned int l; - /* first put the data starting from fifo->in to buffer end */ - l = min(len, fifo->size - off); - memcpy(fifo->buffer + off, from, l); + l = kfifo_unused(fifo); + if (len > l) + len = l; - /* then put the rest (if any) at the beginning of the buffer */ - memcpy(fifo->buffer, from + l, len - l); + kfifo_copy_in(fifo, buf, len, fifo->in); + fifo->in += len; + return len; } +EXPORT_SYMBOL(__kfifo_in); -static inline void __kfifo_out_data(struct kfifo *fifo, - void *to, unsigned int len, unsigned int off) +static void kfifo_copy_out(struct __kfifo *fifo, void *dst, + unsigned int len, unsigned int off) { + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; unsigned int l; + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + memcpy(dst, fifo->data + off, l); + memcpy(dst + l, fifo->data, len - l); /* - * Ensure that we sample the fifo->in index -before- we - * start removing bytes from the kfifo. + * make sure that the data is copied before + * incrementing the fifo->out index counter */ + smp_wmb(); +} - smp_rmb(); +unsigned int __kfifo_out_peek(struct __kfifo *fifo, + void *buf, unsigned int len) +{ + unsigned int l; - off = __kfifo_off(fifo, fifo->out + off); + l = fifo->in - fifo->out; + if (len > l) + len = l; - /* first get the data from fifo->out until the end of the buffer */ - l = min(len, fifo->size - off); - memcpy(to, fifo->buffer + off, l); + kfifo_copy_out(fifo, buf, len, fifo->out); + return len; +} +EXPORT_SYMBOL(__kfifo_out_peek); - /* then get the rest (if any) from the beginning of the buffer */ - memcpy(to + l, fifo->buffer, len - l); +unsigned int __kfifo_out(struct __kfifo *fifo, + void *buf, unsigned int len) +{ + len = __kfifo_out_peek(fifo, buf, len); + fifo->out += len; + return len; } +EXPORT_SYMBOL(__kfifo_out); -static inline int __kfifo_from_user_data(struct kfifo *fifo, - const void __user *from, unsigned int len, unsigned int off, - unsigned *lenout) +static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, + const void __user *from, unsigned int len, unsigned int off, + unsigned int *copied) { + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; unsigned int l; - int ret; + unsigned long ret; + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + ret = copy_from_user(fifo->data + off, from, l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret + len - l, esize); + else { + ret = copy_from_user(fifo->data, from + l, len - l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret, esize); + } /* - * Ensure that we sample the fifo->out index -before- we - * start putting bytes into the kfifo. + * make sure that the data in the fifo is up to date before + * incrementing the fifo->in index counter */ + smp_wmb(); + *copied = len - ret; + /* return the number of elements which are not copied */ + return ret; +} - smp_mb(); +int __kfifo_from_user(struct __kfifo *fifo, const void __user *from, + unsigned long len, unsigned int *copied) +{ + unsigned int l; + unsigned long ret; + unsigned int esize = fifo->esize; + int err; - off = __kfifo_off(fifo, fifo->in + off); + if (esize != 1) + len /= esize; - /* first put the data starting from fifo->in to buffer end */ - l = min(len, fifo->size - off); - ret = copy_from_user(fifo->buffer + off, from, l); - if (unlikely(ret)) { - *lenout = ret; - return -EFAULT; - } - *lenout = l; + l = kfifo_unused(fifo); + if (len > l) + len = l; - /* then put the rest (if any) at the beginning of the buffer */ - ret = copy_from_user(fifo->buffer, from + l, len - l); - *lenout += ret ? ret : len - l; - return ret ? -EFAULT : 0; + ret = kfifo_copy_from_user(fifo, from, len, fifo->in, copied); + if (unlikely(ret)) { + len -= ret; + err = -EFAULT; + } else + err = 0; + fifo->in += len; + return err; } +EXPORT_SYMBOL(__kfifo_from_user); -static inline int __kfifo_to_user_data(struct kfifo *fifo, - void __user *to, unsigned int len, unsigned int off, unsigned *lenout) +static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, + unsigned int len, unsigned int off, unsigned int *copied) { unsigned int l; - int ret; - + unsigned long ret; + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; + + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + ret = copy_to_user(to, fifo->data + off, l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret + len - l, esize); + else { + ret = copy_to_user(to + l, fifo->data, len - l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret, esize); + } /* - * Ensure that we sample the fifo->in index -before- we - * start removing bytes from the kfifo. + * make sure that the data is copied before + * incrementing the fifo->out index counter */ + smp_wmb(); + *copied = len - ret; + /* return the number of elements which are not copied */ + return ret; +} - smp_rmb(); +int __kfifo_to_user(struct __kfifo *fifo, void __user *to, + unsigned long len, unsigned int *copied) +{ + unsigned int l; + unsigned long ret; + unsigned int esize = fifo->esize; + int err; - off = __kfifo_off(fifo, fifo->out + off); + if (esize != 1) + len /= esize; - /* first get the data from fifo->out until the end of the buffer */ - l = min(len, fifo->size - off); - ret = copy_to_user(to, fifo->buffer + off, l); - *lenout = l; + l = fifo->in - fifo->out; + if (len > l) + len = l; + ret = kfifo_copy_to_user(fifo, to, len, fifo->out, copied); if (unlikely(ret)) { - *lenout -= ret; - return -EFAULT; - } + len -= ret; + err = -EFAULT; + } else + err = 0; + fifo->out += len; + return err; +} +EXPORT_SYMBOL(__kfifo_to_user); - /* then get the rest (if any) from the beginning of the buffer */ - len -= l; - ret = copy_to_user(to + l, fifo->buffer, len); - if (unlikely(ret)) { - *lenout += len - ret; - return -EFAULT; +static int setup_sgl_buf(struct scatterlist *sgl, void *buf, + int nents, unsigned int len) +{ + int n; + unsigned int l; + unsigned int off; + struct page *page; + + if (!nents) + return 0; + + if (!len) + return 0; + + n = 0; + page = virt_to_page(buf); + off = offset_in_page(buf); + l = 0; + + while (len >= l + PAGE_SIZE - off) { + struct page *npage; + + l += PAGE_SIZE; + buf += PAGE_SIZE; + npage = virt_to_page(buf); + if (page_to_phys(page) != page_to_phys(npage) - l) { + sg_set_page(sgl, page, l - off, off); + sgl = sg_next(sgl); + if (++n == nents || sgl == NULL) + return n; + page = npage; + len -= l - off; + l = off = 0; + } } - *lenout += len; - return 0; + sg_set_page(sgl, page, len, off); + return n + 1; } -unsigned int __kfifo_in_n(struct kfifo *fifo, - const void *from, unsigned int len, unsigned int recsize) +static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, + int nents, unsigned int len, unsigned int off) { - if (kfifo_avail(fifo) < len + recsize) - return len + 1; + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; + unsigned int l; + unsigned int n; - __kfifo_in_data(fifo, from, len, recsize); - return 0; + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + n = setup_sgl_buf(sgl, fifo->data + off, nents, l); + n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); + + if (n) + sg_mark_end(sgl + n - 1); + return n; } -EXPORT_SYMBOL(__kfifo_in_n); -/** - * kfifo_in - puts some data into the FIFO - * @fifo: the fifo to be used. - * @from: the data to be added. - * @len: the length of the data to be added. - * - * This function copies at most @len bytes from the @from buffer into - * the FIFO depending on the free space, and returns the number of - * bytes copied. - * - * Note that with only one concurrent reader and one concurrent - * writer, you don't need extra locking to use these functions. - */ -unsigned int kfifo_in(struct kfifo *fifo, const void *from, - unsigned int len) +unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len) { - len = min(kfifo_avail(fifo), len); + unsigned int l; - __kfifo_in_data(fifo, from, len, 0); - __kfifo_add_in(fifo, len); - return len; + l = kfifo_unused(fifo); + if (len > l) + len = l; + + return setup_sgl(fifo, sgl, nents, len, fifo->in); } -EXPORT_SYMBOL(kfifo_in); +EXPORT_SYMBOL(__kfifo_dma_in_prepare); -unsigned int __kfifo_in_generic(struct kfifo *fifo, - const void *from, unsigned int len, unsigned int recsize) +unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len) { - return __kfifo_in_rec(fifo, from, len, recsize); + unsigned int l; + + l = fifo->in - fifo->out; + if (len > l) + len = l; + + return setup_sgl(fifo, sgl, nents, len, fifo->out); } -EXPORT_SYMBOL(__kfifo_in_generic); +EXPORT_SYMBOL(__kfifo_dma_out_prepare); -unsigned int __kfifo_out_n(struct kfifo *fifo, - void *to, unsigned int len, unsigned int recsize) +unsigned int __kfifo_max_r(unsigned int len, size_t recsize) { - if (kfifo_len(fifo) < len + recsize) - return len; + unsigned int max = (1 << (recsize << 3)) - 1; - __kfifo_out_data(fifo, to, len, recsize); - __kfifo_add_out(fifo, len + recsize); - return 0; + if (len > max) + return max; + return len; } -EXPORT_SYMBOL(__kfifo_out_n); -/** - * kfifo_out - gets some data from the FIFO - * @fifo: the fifo to be used. - * @to: where the data must be copied. - * @len: the size of the destination buffer. - * - * This function copies at most @len bytes from the FIFO into the - * @to buffer and returns the number of copied bytes. - * - * Note that with only one concurrent reader and one concurrent - * writer, you don't need extra locking to use these functions. +#define __KFIFO_PEEK(data, out, mask) \ + ((data)[(out) & (mask)]) +/* + * __kfifo_peek_n internal helper function for determinate the length of + * the next record in the fifo */ -unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len) +static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize) { - len = min(kfifo_len(fifo), len); + unsigned int l; + unsigned int mask = fifo->mask; + unsigned char *data = fifo->data; - __kfifo_out_data(fifo, to, len, 0); - __kfifo_add_out(fifo, len); + l = __KFIFO_PEEK(data, fifo->out, mask); - return len; + if (--recsize) + l |= __KFIFO_PEEK(data, fifo->out + 1, mask) << 8; + + return l; } -EXPORT_SYMBOL(kfifo_out); - -/** - * kfifo_out_peek - copy some data from the FIFO, but do not remove it - * @fifo: the fifo to be used. - * @to: where the data must be copied. - * @len: the size of the destination buffer. - * @offset: offset into the fifo - * - * This function copies at most @len bytes at @offset from the FIFO - * into the @to buffer and returns the number of copied bytes. - * The data is not removed from the FIFO. + +#define __KFIFO_POKE(data, in, mask, val) \ + ( \ + (data)[(in) & (mask)] = (unsigned char)(val) \ + ) + +/* + * __kfifo_poke_n internal helper function for storeing the length of + * the record into the fifo */ -unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len, - unsigned offset) +static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize) { - len = min(kfifo_len(fifo), len + offset); + unsigned int mask = fifo->mask; + unsigned char *data = fifo->data; - __kfifo_out_data(fifo, to, len, offset); - return len; + __KFIFO_POKE(data, fifo->in, mask, n); + + if (recsize > 1) + __KFIFO_POKE(data, fifo->in + 1, mask, n >> 8); } -EXPORT_SYMBOL(kfifo_out_peek); -unsigned int __kfifo_out_generic(struct kfifo *fifo, - void *to, unsigned int len, unsigned int recsize, - unsigned int *total) +unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize) { - return __kfifo_out_rec(fifo, to, len, recsize, total); + return __kfifo_peek_n(fifo, recsize); } -EXPORT_SYMBOL(__kfifo_out_generic); +EXPORT_SYMBOL(__kfifo_len_r); -unsigned int __kfifo_from_user_n(struct kfifo *fifo, - const void __user *from, unsigned int len, unsigned int recsize) +unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf, + unsigned int len, size_t recsize) { - unsigned total; + if (len + recsize > kfifo_unused(fifo)) + return 0; - if (kfifo_avail(fifo) < len + recsize) - return len + 1; + __kfifo_poke_n(fifo, len, recsize); - __kfifo_from_user_data(fifo, from, len, recsize, &total); - return total; + kfifo_copy_in(fifo, buf, len, fifo->in + recsize); + fifo->in += len + recsize; + return len; } -EXPORT_SYMBOL(__kfifo_from_user_n); - -/** - * kfifo_from_user - puts some data from user space into the FIFO - * @fifo: the fifo to be used. - * @from: pointer to the data to be added. - * @len: the length of the data to be added. - * @total: the actual returned data length. - * - * This function copies at most @len bytes from the @from into the - * FIFO depending and returns -EFAULT/0. - * - * Note that with only one concurrent reader and one concurrent - * writer, you don't need extra locking to use these functions. - */ -int kfifo_from_user(struct kfifo *fifo, - const void __user *from, unsigned int len, unsigned *total) +EXPORT_SYMBOL(__kfifo_in_r); + +static unsigned int kfifo_out_copy_r(struct __kfifo *fifo, + void *buf, unsigned int len, size_t recsize, unsigned int *n) { - int ret; - len = min(kfifo_avail(fifo), len); - ret = __kfifo_from_user_data(fifo, from, len, 0, total); - if (ret) - return ret; - __kfifo_add_in(fifo, len); - return 0; + *n = __kfifo_peek_n(fifo, recsize); + + if (len > *n) + len = *n; + + kfifo_copy_out(fifo, buf, len, fifo->out + recsize); + return len; +} + +unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, + unsigned int len, size_t recsize) +{ + unsigned int n; + + if (fifo->in == fifo->out) + return 0; + + return kfifo_out_copy_r(fifo, buf, len, recsize, &n); } -EXPORT_SYMBOL(kfifo_from_user); +EXPORT_SYMBOL(__kfifo_out_peek_r); -unsigned int __kfifo_from_user_generic(struct kfifo *fifo, - const void __user *from, unsigned int len, unsigned int recsize) +unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, + unsigned int len, size_t recsize) { - return __kfifo_from_user_rec(fifo, from, len, recsize); + unsigned int n; + + if (fifo->in == fifo->out) + return 0; + + len = kfifo_out_copy_r(fifo, buf, len, recsize, &n); + fifo->out += n + recsize; + return len; } -EXPORT_SYMBOL(__kfifo_from_user_generic); +EXPORT_SYMBOL(__kfifo_out_r); -unsigned int __kfifo_to_user_n(struct kfifo *fifo, - void __user *to, unsigned int len, unsigned int reclen, - unsigned int recsize) +int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, + unsigned long len, unsigned int *copied, size_t recsize) { - unsigned int ret, total; + unsigned long ret; - if (kfifo_len(fifo) < reclen + recsize) - return len; + len = __kfifo_max_r(len, recsize); - ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total); + if (len + recsize > kfifo_unused(fifo)) { + *copied = 0; + return 0; + } - if (likely(ret == 0)) - __kfifo_add_out(fifo, reclen + recsize); + __kfifo_poke_n(fifo, len, recsize); - return total; + ret = kfifo_copy_from_user(fifo, from, len, fifo->in + recsize, copied); + if (unlikely(ret)) { + *copied = 0; + return -EFAULT; + } + fifo->in += len + recsize; + return 0; } -EXPORT_SYMBOL(__kfifo_to_user_n); - -/** - * kfifo_to_user - gets data from the FIFO and write it to user space - * @fifo: the fifo to be used. - * @to: where the data must be copied. - * @len: the size of the destination buffer. - * @lenout: pointer to output variable with copied data - * - * This function copies at most @len bytes from the FIFO into the - * @to buffer and 0 or -EFAULT. - * - * Note that with only one concurrent reader and one concurrent - * writer, you don't need extra locking to use these functions. - */ -int kfifo_to_user(struct kfifo *fifo, - void __user *to, unsigned int len, unsigned *lenout) +EXPORT_SYMBOL(__kfifo_from_user_r); + +int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to, + unsigned long len, unsigned int *copied, size_t recsize) { - int ret; - len = min(kfifo_len(fifo), len); - ret = __kfifo_to_user_data(fifo, to, len, 0, lenout); - __kfifo_add_out(fifo, *lenout); - return ret; + unsigned long ret; + unsigned int n; + + if (fifo->in == fifo->out) { + *copied = 0; + return 0; + } + + n = __kfifo_peek_n(fifo, recsize); + if (len > n) + len = n; + + ret = kfifo_copy_to_user(fifo, to, len, fifo->out + recsize, copied); + if (unlikely(ret)) { + *copied = 0; + return -EFAULT; + } + fifo->out += n + recsize; + return 0; } -EXPORT_SYMBOL(kfifo_to_user); +EXPORT_SYMBOL(__kfifo_to_user_r); -unsigned int __kfifo_to_user_generic(struct kfifo *fifo, - void __user *to, unsigned int len, unsigned int recsize, - unsigned int *total) +unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) { - return __kfifo_to_user_rec(fifo, to, len, recsize, total); + if (!nents) + BUG(); + + len = __kfifo_max_r(len, recsize); + + if (len + recsize > kfifo_unused(fifo)) + return 0; + + return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize); } -EXPORT_SYMBOL(__kfifo_to_user_generic); +EXPORT_SYMBOL(__kfifo_dma_in_prepare_r); -unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize) +void __kfifo_dma_in_finish_r(struct __kfifo *fifo, + unsigned int len, size_t recsize) { - if (recsize == 0) - return kfifo_avail(fifo); - - return __kfifo_peek_n(fifo, recsize); + len = __kfifo_max_r(len, recsize); + __kfifo_poke_n(fifo, len, recsize); + fifo->in += len + recsize; } -EXPORT_SYMBOL(__kfifo_peek_generic); +EXPORT_SYMBOL(__kfifo_dma_in_finish_r); -void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize) +unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) { - __kfifo_skip_rec(fifo, recsize); + if (!nents) + BUG(); + + len = __kfifo_max_r(len, recsize); + + if (len + recsize > fifo->in - fifo->out) + return 0; + + return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize); } -EXPORT_SYMBOL(__kfifo_skip_generic); +EXPORT_SYMBOL(__kfifo_dma_out_prepare_r); + +void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize) +{ + unsigned int len; + len = __kfifo_peek_n(fifo, recsize); + fifo->out += len + recsize; +} +EXPORT_SYMBOL(__kfifo_dma_out_finish_r); diff --git a/kernel/panic.c b/kernel/panic.c index 3b16cd93fa7..4c13b1a88eb 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -24,6 +24,9 @@ #include <linux/nmi.h> #include <linux/dmi.h> +#define PANIC_TIMER_STEP 100 +#define PANIC_BLINK_SPD 18 + int panic_on_oops; static unsigned long tainted_mask; static int pause_on_oops; @@ -36,36 +39,15 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); -/* Returns how long it waited in ms */ -long (*panic_blink)(long time); -EXPORT_SYMBOL(panic_blink); - -static void panic_blink_one_second(void) +static long no_blink(int state) { - static long i = 0, end; - - if (panic_blink) { - end = i + MSEC_PER_SEC; - - while (i < end) { - i += panic_blink(i); - mdelay(1); - i++; - } - } else { - /* - * When running under a hypervisor a small mdelay may get - * rounded up to the hypervisor timeslice. For example, with - * a 1ms in 10ms hypervisor timeslice we might inflate a - * mdelay(1) loop by 10x. - * - * If we have nothing to blink, spin on 1 second calls to - * mdelay to avoid this. - */ - mdelay(MSEC_PER_SEC); - } + return 0; } +/* Returns how long it waited in ms */ +long (*panic_blink)(int state); +EXPORT_SYMBOL(panic_blink); + /** * panic - halt the system * @fmt: The text string to print @@ -78,7 +60,8 @@ NORET_TYPE void panic(const char * fmt, ...) { static char buf[1024]; va_list args; - long i; + long i, i_next = 0; + int state = 0; /* * It's possible to come here directly from a panic-assertion and @@ -117,6 +100,9 @@ NORET_TYPE void panic(const char * fmt, ...) bust_spinlocks(0); + if (!panic_blink) + panic_blink = no_blink; + if (panic_timeout > 0) { /* * Delay timeout seconds before rebooting the machine. @@ -124,9 +110,13 @@ NORET_TYPE void panic(const char * fmt, ...) */ printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout); - for (i = 0; i < panic_timeout; i++) { + for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { touch_nmi_watchdog(); - panic_blink_one_second(); + if (i >= i_next) { + i += panic_blink(state ^= 1); + i_next = i + 3600 / PANIC_BLINK_SPD; + } + mdelay(PANIC_TIMER_STEP); } /* * This will not be a clean reboot, with everything @@ -152,9 +142,13 @@ NORET_TYPE void panic(const char * fmt, ...) } #endif local_irq_enable(); - while (1) { + for (i = 0; ; i += PANIC_TIMER_STEP) { touch_softlockup_watchdog(); - panic_blink_one_second(); + if (i >= i_next) { + i += panic_blink(state ^= 1); + i_next = i + 3600 / PANIC_BLINK_SPD; + } + mdelay(PANIC_TIMER_STEP); } } @@ -344,7 +338,7 @@ static int init_oops_id(void) } late_initcall(init_oops_id); -static void print_oops_end_marker(void) +void print_oops_end_marker(void) { init_oops_id(); printk(KERN_WARNING "---[ end trace %016llx ]---\n", diff --git a/kernel/params.c b/kernel/params.c index 0b30ecd53a5..08107d18175 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -31,6 +31,42 @@ #define DEBUGP(fmt, a...) #endif +/* Protects all parameters, and incidentally kmalloced_param list. */ +static DEFINE_MUTEX(param_lock); + +/* This just allows us to keep track of which parameters are kmalloced. */ +struct kmalloced_param { + struct list_head list; + char val[]; +}; +static LIST_HEAD(kmalloced_params); + +static void *kmalloc_parameter(unsigned int size) +{ + struct kmalloced_param *p; + + p = kmalloc(sizeof(*p) + size, GFP_KERNEL); + if (!p) + return NULL; + + list_add(&p->list, &kmalloced_params); + return p->val; +} + +/* Does nothing if parameter wasn't kmalloced above. */ +static void maybe_kfree_parameter(void *param) +{ + struct kmalloced_param *p; + + list_for_each_entry(p, &kmalloced_params, list) { + if (p->val == param) { + list_del(&p->list); + kfree(p); + break; + } + } +} + static inline char dash2underscore(char c) { if (c == '-') @@ -49,18 +85,25 @@ static inline int parameq(const char *input, const char *paramname) static int parse_one(char *param, char *val, - struct kernel_param *params, + const struct kernel_param *params, unsigned num_params, int (*handle_unknown)(char *param, char *val)) { unsigned int i; + int err; /* Find parameter */ for (i = 0; i < num_params; i++) { if (parameq(param, params[i].name)) { + /* Noone handled NULL, so do it here. */ + if (!val && params[i].ops->set != param_set_bool) + return -EINVAL; DEBUGP("They are equal! Calling %p\n", - params[i].set); - return params[i].set(val, ¶ms[i]); + params[i].ops->set); + mutex_lock(¶m_lock); + err = params[i].ops->set(val, ¶ms[i]); + mutex_unlock(¶m_lock); + return err; } } @@ -128,7 +171,7 @@ static char *next_arg(char *args, char **param, char **val) /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ int parse_args(const char *name, char *args, - struct kernel_param *params, + const struct kernel_param *params, unsigned num, int (*unknown)(char *param, char *val)) { @@ -176,22 +219,29 @@ int parse_args(const char *name, /* Lazy bastard, eh? */ #define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ - int param_set_##name(const char *val, struct kernel_param *kp) \ + int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ tmptype l; \ int ret; \ \ - if (!val) return -EINVAL; \ ret = strtolfn(val, 0, &l); \ if (ret == -EINVAL || ((type)l != l)) \ return -EINVAL; \ *((type *)kp->arg) = l; \ return 0; \ } \ - int param_get_##name(char *buffer, struct kernel_param *kp) \ + int param_get_##name(char *buffer, const struct kernel_param *kp) \ { \ return sprintf(buffer, format, *((type *)kp->arg)); \ - } + } \ + struct kernel_param_ops param_ops_##name = { \ + .set = param_set_##name, \ + .get = param_get_##name, \ + }; \ + EXPORT_SYMBOL(param_set_##name); \ + EXPORT_SYMBOL(param_get_##name); \ + EXPORT_SYMBOL(param_ops_##name) + STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol); @@ -201,39 +251,50 @@ STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol); STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); -int param_set_charp(const char *val, struct kernel_param *kp) +int param_set_charp(const char *val, const struct kernel_param *kp) { - if (!val) { - printk(KERN_ERR "%s: string parameter expected\n", - kp->name); - return -EINVAL; - } - if (strlen(val) > 1024) { printk(KERN_ERR "%s: string parameter too long\n", kp->name); return -ENOSPC; } - /* This is a hack. We can't need to strdup in early boot, and we + maybe_kfree_parameter(*(char **)kp->arg); + + /* This is a hack. We can't kmalloc in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { - *(char **)kp->arg = kstrdup(val, GFP_KERNEL); + *(char **)kp->arg = kmalloc_parameter(strlen(val)+1); if (!*(char **)kp->arg) return -ENOMEM; + strcpy(*(char **)kp->arg, val); } else *(const char **)kp->arg = val; return 0; } +EXPORT_SYMBOL(param_set_charp); -int param_get_charp(char *buffer, struct kernel_param *kp) +int param_get_charp(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%s", *((char **)kp->arg)); } +EXPORT_SYMBOL(param_get_charp); + +static void param_free_charp(void *arg) +{ + maybe_kfree_parameter(*((char **)arg)); +} + +struct kernel_param_ops param_ops_charp = { + .set = param_set_charp, + .get = param_get_charp, + .free = param_free_charp, +}; +EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ -int param_set_bool(const char *val, struct kernel_param *kp) +int param_set_bool(const char *val, const struct kernel_param *kp) { bool v; @@ -258,8 +319,9 @@ int param_set_bool(const char *val, struct kernel_param *kp) *(int *)kp->arg = v; return 0; } +EXPORT_SYMBOL(param_set_bool); -int param_get_bool(char *buffer, struct kernel_param *kp) +int param_get_bool(char *buffer, const struct kernel_param *kp) { bool val; if (kp->flags & KPARAM_ISBOOL) @@ -270,9 +332,16 @@ int param_get_bool(char *buffer, struct kernel_param *kp) /* Y and N chosen as being relatively non-coder friendly */ return sprintf(buffer, "%c", val ? 'Y' : 'N'); } +EXPORT_SYMBOL(param_get_bool); + +struct kernel_param_ops param_ops_bool = { + .set = param_set_bool, + .get = param_get_bool, +}; +EXPORT_SYMBOL(param_ops_bool); /* This one must be bool. */ -int param_set_invbool(const char *val, struct kernel_param *kp) +int param_set_invbool(const char *val, const struct kernel_param *kp) { int ret; bool boolval; @@ -285,18 +354,26 @@ int param_set_invbool(const char *val, struct kernel_param *kp) *(bool *)kp->arg = !boolval; return ret; } +EXPORT_SYMBOL(param_set_invbool); -int param_get_invbool(char *buffer, struct kernel_param *kp) +int param_get_invbool(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); } +EXPORT_SYMBOL(param_get_invbool); + +struct kernel_param_ops param_ops_invbool = { + .set = param_set_invbool, + .get = param_get_invbool, +}; +EXPORT_SYMBOL(param_ops_invbool); /* We break the rule and mangle the string. */ static int param_array(const char *name, const char *val, unsigned int min, unsigned int max, void *elem, int elemsize, - int (*set)(const char *, struct kernel_param *kp), + int (*set)(const char *, const struct kernel_param *kp), u16 flags, unsigned int *num) { @@ -309,12 +386,6 @@ static int param_array(const char *name, kp.arg = elem; kp.flags = flags; - /* No equals sign? */ - if (!val) { - printk(KERN_ERR "%s: expects arguments\n", name); - return -EINVAL; - } - *num = 0; /* We expect a comma-separated list of values. */ do { @@ -330,6 +401,7 @@ static int param_array(const char *name, /* nul-terminate and parse */ save = val[len]; ((char *)val)[len] = '\0'; + BUG_ON(!mutex_is_locked(¶m_lock)); ret = set(val, &kp); if (ret != 0) @@ -347,17 +419,17 @@ static int param_array(const char *name, return 0; } -int param_array_set(const char *val, struct kernel_param *kp) +static int param_array_set(const char *val, const struct kernel_param *kp) { const struct kparam_array *arr = kp->arr; unsigned int temp_num; return param_array(kp->name, val, 1, arr->max, arr->elem, - arr->elemsize, arr->set, kp->flags, + arr->elemsize, arr->ops->set, kp->flags, arr->num ?: &temp_num); } -int param_array_get(char *buffer, struct kernel_param *kp) +static int param_array_get(char *buffer, const struct kernel_param *kp) { int i, off, ret; const struct kparam_array *arr = kp->arr; @@ -368,7 +440,8 @@ int param_array_get(char *buffer, struct kernel_param *kp) if (i) buffer[off++] = ','; p.arg = arr->elem + arr->elemsize * i; - ret = arr->get(buffer + off, &p); + BUG_ON(!mutex_is_locked(¶m_lock)); + ret = arr->ops->get(buffer + off, &p); if (ret < 0) return ret; off += ret; @@ -377,14 +450,27 @@ int param_array_get(char *buffer, struct kernel_param *kp) return off; } -int param_set_copystring(const char *val, struct kernel_param *kp) +static void param_array_free(void *arg) +{ + unsigned int i; + const struct kparam_array *arr = arg; + + if (arr->ops->free) + for (i = 0; i < (arr->num ? *arr->num : arr->max); i++) + arr->ops->free(arr->elem + arr->elemsize * i); +} + +struct kernel_param_ops param_array_ops = { + .set = param_array_set, + .get = param_array_get, + .free = param_array_free, +}; +EXPORT_SYMBOL(param_array_ops); + +int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; - if (!val) { - printk(KERN_ERR "%s: missing param set value\n", kp->name); - return -EINVAL; - } if (strlen(val)+1 > kps->maxlen) { printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); @@ -393,12 +479,20 @@ int param_set_copystring(const char *val, struct kernel_param *kp) strcpy(kps->string, val); return 0; } +EXPORT_SYMBOL(param_set_copystring); -int param_get_string(char *buffer, struct kernel_param *kp) +int param_get_string(char *buffer, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; return strlcpy(buffer, kps->string, kps->maxlen); } +EXPORT_SYMBOL(param_get_string); + +struct kernel_param_ops param_ops_string = { + .set = param_set_copystring, + .get = param_get_string, +}; +EXPORT_SYMBOL(param_ops_string); /* sysfs output in /sys/modules/XYZ/parameters/ */ #define to_module_attr(n) container_of(n, struct module_attribute, attr) @@ -409,7 +503,7 @@ extern struct kernel_param __start___param[], __stop___param[]; struct param_attribute { struct module_attribute mattr; - struct kernel_param *param; + const struct kernel_param *param; }; struct module_param_attrs @@ -428,10 +522,12 @@ static ssize_t param_attr_show(struct module_attribute *mattr, int count; struct param_attribute *attribute = to_param_attr(mattr); - if (!attribute->param->get) + if (!attribute->param->ops->get) return -EPERM; - count = attribute->param->get(buf, attribute->param); + mutex_lock(¶m_lock); + count = attribute->param->ops->get(buf, attribute->param); + mutex_unlock(¶m_lock); if (count > 0) { strcat(buf, "\n"); ++count; @@ -447,10 +543,12 @@ static ssize_t param_attr_store(struct module_attribute *mattr, int err; struct param_attribute *attribute = to_param_attr(mattr); - if (!attribute->param->set) + if (!attribute->param->ops->set) return -EPERM; - err = attribute->param->set(buf, attribute->param); + mutex_lock(¶m_lock); + err = attribute->param->ops->set(buf, attribute->param); + mutex_unlock(¶m_lock); if (!err) return len; return err; @@ -464,6 +562,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr, #endif #ifdef CONFIG_SYSFS +void __kernel_param_lock(void) +{ + mutex_lock(¶m_lock); +} +EXPORT_SYMBOL(__kernel_param_lock); + +void __kernel_param_unlock(void) +{ + mutex_unlock(¶m_lock); +} +EXPORT_SYMBOL(__kernel_param_unlock); + /* * add_sysfs_param - add a parameter to sysfs * @mk: struct module_kobject @@ -475,7 +585,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr, * if there's an error. */ static __modinit int add_sysfs_param(struct module_kobject *mk, - struct kernel_param *kp, + const struct kernel_param *kp, const char *name) { struct module_param_attrs *new; @@ -557,7 +667,7 @@ static void free_module_param_attrs(struct module_kobject *mk) * /sys/module/[mod->name]/parameters/ */ int module_param_sysfs_setup(struct module *mod, - struct kernel_param *kparam, + const struct kernel_param *kparam, unsigned int num_params) { int i, err; @@ -602,7 +712,11 @@ void module_param_sysfs_remove(struct module *mod) void destroy_params(const struct kernel_param *params, unsigned num) { - /* FIXME: This should free kmalloced charp parameters. It doesn't. */ + unsigned int i; + + for (i = 0; i < num; i++) + if (params[i].ops->free) + params[i].ops->free(params[i].arg); } static void __init kernel_add_sysfs_param(const char *name, @@ -768,28 +882,3 @@ static int __init param_sysfs_init(void) subsys_initcall(param_sysfs_init); #endif /* CONFIG_SYSFS */ - -EXPORT_SYMBOL(param_set_byte); -EXPORT_SYMBOL(param_get_byte); -EXPORT_SYMBOL(param_set_short); -EXPORT_SYMBOL(param_get_short); -EXPORT_SYMBOL(param_set_ushort); -EXPORT_SYMBOL(param_get_ushort); -EXPORT_SYMBOL(param_set_int); -EXPORT_SYMBOL(param_get_int); -EXPORT_SYMBOL(param_set_uint); -EXPORT_SYMBOL(param_get_uint); -EXPORT_SYMBOL(param_set_long); -EXPORT_SYMBOL(param_get_long); -EXPORT_SYMBOL(param_set_ulong); -EXPORT_SYMBOL(param_get_ulong); -EXPORT_SYMBOL(param_set_charp); -EXPORT_SYMBOL(param_get_charp); -EXPORT_SYMBOL(param_set_bool); -EXPORT_SYMBOL(param_get_bool); -EXPORT_SYMBOL(param_set_invbool); -EXPORT_SYMBOL(param_get_invbool); -EXPORT_SYMBOL(param_array_set); -EXPORT_SYMBOL(param_array_get); -EXPORT_SYMBOL(param_set_copystring); -EXPORT_SYMBOL(param_get_string); diff --git a/kernel/pid.c b/kernel/pid.c index e9fd8c132d2..d55c6fb8d08 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -122,6 +122,43 @@ static void free_pidmap(struct upid *upid) atomic_inc(&map->nr_free); } +/* + * If we started walking pids at 'base', is 'a' seen before 'b'? + */ +static int pid_before(int base, int a, int b) +{ + /* + * This is the same as saying + * + * (a - base + MAXUINT) % MAXUINT < (b - base + MAXUINT) % MAXUINT + * and that mapping orders 'a' and 'b' with respect to 'base'. + */ + return (unsigned)(a - base) < (unsigned)(b - base); +} + +/* + * We might be racing with someone else trying to set pid_ns->last_pid. + * We want the winner to have the "later" value, because if the + * "earlier" value prevails, then a pid may get reused immediately. + * + * Since pids rollover, it is not sufficient to just pick the bigger + * value. We have to consider where we started counting from. + * + * 'base' is the value of pid_ns->last_pid that we observed when + * we started looking for a pid. + * + * 'pid' is the pid that we eventually found. + */ +static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid) +{ + int prev; + int last_write = base; + do { + prev = last_write; + last_write = cmpxchg(&pid_ns->last_pid, prev, pid); + } while ((prev != last_write) && (pid_before(base, last_write, pid))); +} + static int alloc_pidmap(struct pid_namespace *pid_ns) { int i, offset, max_scan, pid, last = pid_ns->last_pid; @@ -132,7 +169,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) pid = RESERVED_PIDS; offset = pid & BITS_PER_PAGE_MASK; map = &pid_ns->pidmap[pid/BITS_PER_PAGE]; - max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; + /* + * If last_pid points into the middle of the map->page we + * want to scan this bitmap block twice, the second time + * we start with offset == 0 (or RESERVED_PIDS). + */ + max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset; for (i = 0; i <= max_scan; ++i) { if (unlikely(!map->page)) { void *page = kzalloc(PAGE_SIZE, GFP_KERNEL); @@ -154,20 +196,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) do { if (!test_and_set_bit(offset, map->page)) { atomic_dec(&map->nr_free); - pid_ns->last_pid = pid; + set_last_pid(pid_ns, last, pid); return pid; } offset = find_next_offset(map, offset); pid = mk_pid(pid_ns, map, offset); - /* - * find_next_offset() found a bit, the pid from it - * is in-bounds, and if we fell back to the last - * bitmap block and the final block was the same - * as the starting point, pid is before last_pid. - */ - } while (offset < BITS_PER_PAGE && pid < pid_max && - (i != max_scan || pid < last || - !((last+1) & BITS_PER_PAGE_MASK))); + } while (offset < BITS_PER_PAGE && pid < pid_max); } if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { ++map; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index f66bdd33a6c..6842eeba587 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -16,13 +16,13 @@ * siglock protection since other code may update expiration cache as * well. */ -void update_rlimit_cpu(unsigned long rlim_new) +void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { cputime_t cputime = secs_to_cputime(rlim_new); - spin_lock_irq(¤t->sighand->siglock); - set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); - spin_unlock_irq(¤t->sighand->siglock); + spin_lock_irq(&task->sighand->siglock); + set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL); + spin_unlock_irq(&task->sighand->siglock); } static int check_clock(const clockid_t which_clock) diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c index 97024fd40cd..83bbc7c02df 100644 --- a/kernel/power/block_io.c +++ b/kernel/power/block_io.c @@ -28,7 +28,7 @@ static int submit(int rw, struct block_device *bdev, sector_t sector, struct page *page, struct bio **bio_chain) { - const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG; struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8dc31e02ae1..c77963938bc 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,6 +338,7 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); + hibernation_freeze_swap(); saved_mask = clear_gfp_allowed_mask(GFP_IOFS); error = dpm_suspend_start(PMSG_FREEZE); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f6cd6faf84f..5e7edfb05e6 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1086,6 +1086,7 @@ void swsusp_free(void) buffer = NULL; alloc_normal = 0; alloc_highmem = 0; + hibernation_thaw_swap(); } /* Helper functions used for the shrinking of memory. */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index e6a5bdf61a3..5d0059eed3e 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap) { unsigned long offset; - offset = swp_offset(get_swap_page_of_type(swap)); + offset = swp_offset(get_swap_for_hibernation(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free(swp_entry(swap, offset)); + swap_free_for_hibernation(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } @@ -163,7 +163,7 @@ void free_all_swap_pages(int swap) ext = container_of(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); for (offset = ext->start; offset <= ext->end; offset++) - swap_free(swp_entry(swap, offset)); + swap_free_for_hibernation(swp_entry(swap, offset)); kfree(ext); } diff --git a/kernel/printk.c b/kernel/printk.c index 4ab0164bcf8..8fe465ac008 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1549,9 +1549,9 @@ void kmsg_dump(enum kmsg_dump_reason reason) chars = logged_chars; spin_unlock_irqrestore(&logbuf_lock, flags); - if (logged_chars > end) { - s1 = log_buf + log_buf_len - logged_chars + end; - l1 = logged_chars - end; + if (chars > end) { + s1 = log_buf + log_buf_len - chars + end; + l1 = chars - end; s2 = log_buf; l2 = end; @@ -1559,8 +1559,8 @@ void kmsg_dump(enum kmsg_dump_reason reason) s1 = ""; l1 = 0; - s2 = log_buf + end - logged_chars; - l2 = logged_chars; + s2 = log_buf + end - chars; + l2 = chars; } if (!spin_trylock_irqsave(&dump_list_lock, flags)) { diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 74a3d693c19..f34d798ef4a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -324,26 +324,32 @@ int ptrace_detach(struct task_struct *child, unsigned int data) } /* - * Detach all tasks we were using ptrace on. + * Detach all tasks we were using ptrace on. Called with tasklist held + * for writing, and returns with it held too. But note it can release + * and reacquire the lock. */ void exit_ptrace(struct task_struct *tracer) { struct task_struct *p, *n; LIST_HEAD(ptrace_dead); - write_lock_irq(&tasklist_lock); + if (likely(list_empty(&tracer->ptraced))) + return; + list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { if (__ptrace_detach(tracer, p)) list_add(&p->ptrace_entry, &ptrace_dead); } - write_unlock_irq(&tasklist_lock); + write_unlock_irq(&tasklist_lock); BUG_ON(!list_empty(&tracer->ptraced)); list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) { list_del_init(&p->ptrace_entry); release_task(p); } + + write_lock_irq(&tasklist_lock); } int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) diff --git a/kernel/range.c b/kernel/range.c index 74e2e611492..471b66acabb 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -7,10 +7,6 @@ #include <linux/range.h> -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - int add_range(struct range *range, int az, int nr_range, u64 start, u64 end) { if (start >= end) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 70f8d90331e..4372ccb2512 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -35,9 +35,9 @@ struct cpu_stop_done { /* the actual stopper, one per every possible cpu, enabled on online cpus */ struct cpu_stopper { spinlock_t lock; + bool enabled; /* is this stopper enabled? */ struct list_head works; /* list of pending works */ struct task_struct *thread; /* stopper thread */ - bool enabled; /* is this stopper enabled? */ }; static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); diff --git a/kernel/sys.c b/kernel/sys.c index e83ddbbaf89..e9ad4448982 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1236,15 +1236,14 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { - if (resource >= RLIM_NLIMITS) - return -EINVAL; - else { - struct rlimit value; - task_lock(current->group_leader); - value = current->signal->rlim[resource]; - task_unlock(current->group_leader); - return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; - } + struct rlimit value; + int ret; + + ret = do_prlimit(current, resource, NULL, &value); + if (!ret) + ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; + + return ret; } #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT @@ -1272,44 +1271,89 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, #endif -SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) +static inline bool rlim64_is_infinity(__u64 rlim64) { - struct rlimit new_rlim, *old_rlim; - int retval; +#if BITS_PER_LONG < 64 + return rlim64 >= ULONG_MAX; +#else + return rlim64 == RLIM64_INFINITY; +#endif +} + +static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) +{ + if (rlim->rlim_cur == RLIM_INFINITY) + rlim64->rlim_cur = RLIM64_INFINITY; + else + rlim64->rlim_cur = rlim->rlim_cur; + if (rlim->rlim_max == RLIM_INFINITY) + rlim64->rlim_max = RLIM64_INFINITY; + else + rlim64->rlim_max = rlim->rlim_max; +} + +static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) +{ + if (rlim64_is_infinity(rlim64->rlim_cur)) + rlim->rlim_cur = RLIM_INFINITY; + else + rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; + if (rlim64_is_infinity(rlim64->rlim_max)) + rlim->rlim_max = RLIM_INFINITY; + else + rlim->rlim_max = (unsigned long)rlim64->rlim_max; +} + +/* make sure you are allowed to change @tsk limits before calling this */ +int do_prlimit(struct task_struct *tsk, unsigned int resource, + struct rlimit *new_rlim, struct rlimit *old_rlim) +{ + struct rlimit *rlim; + int retval = 0; if (resource >= RLIM_NLIMITS) return -EINVAL; - if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) - return -EFAULT; - if (new_rlim.rlim_cur > new_rlim.rlim_max) - return -EINVAL; - old_rlim = current->signal->rlim + resource; - if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) - return -EPERM; - if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) - return -EPERM; - - retval = security_task_setrlimit(resource, &new_rlim); - if (retval) - return retval; - - if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) { - /* - * The caller is asking for an immediate RLIMIT_CPU - * expiry. But we use the zero value to mean "it was - * never set". So let's cheat and make it one second - * instead - */ - new_rlim.rlim_cur = 1; + if (new_rlim) { + if (new_rlim->rlim_cur > new_rlim->rlim_max) + return -EINVAL; + if (resource == RLIMIT_NOFILE && + new_rlim->rlim_max > sysctl_nr_open) + return -EPERM; } - task_lock(current->group_leader); - *old_rlim = new_rlim; - task_unlock(current->group_leader); - - if (resource != RLIMIT_CPU) + /* protect tsk->signal and tsk->sighand from disappearing */ + read_lock(&tasklist_lock); + if (!tsk->sighand) { + retval = -ESRCH; goto out; + } + + rlim = tsk->signal->rlim + resource; + task_lock(tsk->group_leader); + if (new_rlim) { + if (new_rlim->rlim_max > rlim->rlim_max && + !capable(CAP_SYS_RESOURCE)) + retval = -EPERM; + if (!retval) + retval = security_task_setrlimit(tsk->group_leader, + resource, new_rlim); + if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { + /* + * The caller is asking for an immediate RLIMIT_CPU + * expiry. But we use the zero value to mean "it was + * never set". So let's cheat and make it one second + * instead + */ + new_rlim->rlim_cur = 1; + } + } + if (!retval) { + if (old_rlim) + *old_rlim = *rlim; + if (new_rlim) + *rlim = *new_rlim; + } + task_unlock(tsk->group_leader); /* * RLIMIT_CPU handling. Note that the kernel fails to return an error @@ -1317,14 +1361,84 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) * very long-standing error, and fixing it now risks breakage of * applications, so we live with it */ - if (new_rlim.rlim_cur == RLIM_INFINITY) - goto out; - - update_rlimit_cpu(new_rlim.rlim_cur); + if (!retval && new_rlim && resource == RLIMIT_CPU && + new_rlim->rlim_cur != RLIM_INFINITY) + update_rlimit_cpu(tsk, new_rlim->rlim_cur); out: + read_unlock(&tasklist_lock); + return retval; +} + +/* rcu lock must be held */ +static int check_prlimit_permission(struct task_struct *task) +{ + const struct cred *cred = current_cred(), *tcred; + + tcred = __task_cred(task); + if ((cred->uid != tcred->euid || + cred->uid != tcred->suid || + cred->uid != tcred->uid || + cred->gid != tcred->egid || + cred->gid != tcred->sgid || + cred->gid != tcred->gid) && + !capable(CAP_SYS_RESOURCE)) { + return -EPERM; + } + return 0; } +SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, + const struct rlimit64 __user *, new_rlim, + struct rlimit64 __user *, old_rlim) +{ + struct rlimit64 old64, new64; + struct rlimit old, new; + struct task_struct *tsk; + int ret; + + if (new_rlim) { + if (copy_from_user(&new64, new_rlim, sizeof(new64))) + return -EFAULT; + rlim64_to_rlim(&new64, &new); + } + + rcu_read_lock(); + tsk = pid ? find_task_by_vpid(pid) : current; + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + ret = check_prlimit_permission(tsk); + if (ret) { + rcu_read_unlock(); + return ret; + } + get_task_struct(tsk); + rcu_read_unlock(); + + ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, + old_rlim ? &old : NULL); + + if (!ret && old_rlim) { + rlim_to_rlim64(&old, &old64); + if (copy_to_user(old_rlim, &old64, sizeof(old64))) + ret = -EFAULT; + } + + put_task_struct(tsk); + return ret; +} + +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) +{ + struct rlimit new_rlim; + + if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) + return -EFAULT; + return do_prlimit(current, resource, &new_rlim, NULL); +} + /* * It would make sense to put struct rusage in the task_struct, * except that would make the task_struct be *really big*. After diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 70f2ea758ff..bad369ec540 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -181,3 +181,7 @@ cond_syscall(sys_eventfd2); /* performance counters: */ cond_syscall(sys_perf_event_open); + +/* fanotify! */ +cond_syscall(sys_fanotify_init); +cond_syscall(sys_fanotify_mark); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6d850bf0a51..ca38e8e3e90 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -44,6 +44,7 @@ #include <linux/times.h> #include <linux/limits.h> #include <linux/dcache.h> +#include <linux/dnotify.h> #include <linux/syscalls.h> #include <linux/vmstat.h> #include <linux/nfs_fs.h> @@ -53,6 +54,7 @@ #include <linux/perf_event.h> #include <linux/kprobes.h> #include <linux/pipe_fs_i.h> +#include <linux/oom.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -85,9 +87,6 @@ /* External variables not in a header file. */ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; -extern int sysctl_panic_on_oom; -extern int sysctl_oom_kill_allocating_task; -extern int sysctl_oom_dump_tasks; extern int max_threads; extern int core_uses_pid; extern int suid_dumpable; @@ -133,6 +132,9 @@ static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; +#ifdef CONFIG_INOTIFY_USER +#include <linux/inotify.h> +#endif #ifdef CONFIG_SPARC #include <asm/system.h> #endif @@ -209,9 +211,6 @@ static struct ctl_table fs_table[]; static struct ctl_table debug_table[]; static struct ctl_table dev_table[]; extern struct ctl_table random_table[]; -#ifdef CONFIG_INOTIFY_USER -extern struct ctl_table inotify_table[]; -#endif #ifdef CONFIG_EPOLL extern struct ctl_table epoll_table[]; #endif diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e14c839e9fa..49010d822f7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -690,6 +690,7 @@ static void timekeeping_adjust(s64 offset) static cycle_t logarithmic_accumulation(cycle_t offset, int shift) { u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; + u64 raw_nsecs; /* If the offset is smaller then a shifted interval, do nothing */ if (offset < timekeeper.cycle_interval<<shift) @@ -706,12 +707,15 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) second_overflow(); } - /* Accumulate into raw time */ - raw_time.tv_nsec += timekeeper.raw_interval << shift;; - while (raw_time.tv_nsec >= NSEC_PER_SEC) { - raw_time.tv_nsec -= NSEC_PER_SEC; - raw_time.tv_sec++; + /* Accumulate raw time */ + raw_nsecs = timekeeper.raw_interval << shift; + raw_nsecs += raw_time.tv_nsec; + if (raw_nsecs >= NSEC_PER_SEC) { + u64 raw_secs = raw_nsecs; + raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); + raw_time.tv_sec += raw_secs; } + raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ timekeeper.ntp_error += tick_length << shift; diff --git a/kernel/timer.c b/kernel/timer.c index f1b8afe1ad8..97bf05baade 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -326,6 +326,7 @@ EXPORT_SYMBOL_GPL(round_jiffies_up_relative); /** * set_timer_slack - set the allowed slack for a timer + * @timer: the timer to be modified * @slack_hz: the amount of time (in jiffies) allowed for rounding * * Set the amount of time, in jiffies, that a certain timer has diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 638711c1750..959f8d6c8cc 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -169,9 +169,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; +#define BLK_TC_HARDBARRIER BLK_TC_BARRIER +#define BLK_TC_RAHEAD BLK_TC_AHEAD + /* The ilog2() calls fall out because they're constant */ -#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ - (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) +#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) /* * The worker for the various blk_add_trace*() types. Fills out a @@ -194,9 +197,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, return; what |= ddir_act[rw & WRITE]; - what |= MASK_TC_BIT(rw, BARRIER); - what |= MASK_TC_BIT(rw, SYNCIO); - what |= MASK_TC_BIT(rw, AHEAD); + what |= MASK_TC_BIT(rw, HARDBARRIER); + what |= MASK_TC_BIT(rw, SYNC); + what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); @@ -549,6 +552,41 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } EXPORT_SYMBOL_GPL(blk_trace_setup); +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) +static int compat_blk_trace_setup(struct request_queue *q, char *name, + dev_t dev, struct block_device *bdev, + char __user *arg) +{ + struct blk_user_trace_setup buts; + struct compat_blk_user_trace_setup cbuts; + int ret; + + if (copy_from_user(&cbuts, arg, sizeof(cbuts))) + return -EFAULT; + + buts = (struct blk_user_trace_setup) { + .act_mask = cbuts.act_mask, + .buf_size = cbuts.buf_size, + .buf_nr = cbuts.buf_nr, + .start_lba = cbuts.start_lba, + .end_lba = cbuts.end_lba, + .pid = cbuts.pid, + }; + memcpy(&buts.name, &cbuts.name, 32); + + ret = do_blk_trace_setup(q, name, dev, bdev, &buts); + if (ret) + return ret; + + if (copy_to_user(arg, &buts.name, 32)) { + blk_trace_remove(q); + return -EFAULT; + } + + return 0; +} +#endif + int blk_trace_startstop(struct request_queue *q, int start) { int ret; @@ -601,6 +639,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) if (!q) return -ENXIO; + lock_kernel(); mutex_lock(&bdev->bd_mutex); switch (cmd) { @@ -608,6 +647,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) bdevname(bdev, b); ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) + case BLKTRACESETUP32: + bdevname(bdev, b); + ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); + break; +#endif case BLKTRACESTART: start = 1; case BLKTRACESTOP: @@ -622,6 +667,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) } mutex_unlock(&bdev->bd_mutex); + unlock_kernel(); return ret; } @@ -661,10 +707,13 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (likely(!bt)) return; - if (blk_discard_rq(rq)) - rw |= (1 << BIO_RW_DISCARD); + if (rq->cmd_flags & REQ_DISCARD) + rw |= REQ_DISCARD; + + if (rq->cmd_flags & REQ_SECURE) + rw |= REQ_SECURE; - if (blk_pc_request(rq)) { + if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, what, rq->errors, rq->cmd_len, rq->cmd); @@ -925,7 +974,7 @@ void blk_add_driver_data(struct request_queue *q, if (likely(!bt)) return; - if (blk_pc_request(rq)) + if (rq->cmd_type == REQ_TYPE_BLOCK_PC) __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, BLK_TA_DRV_DATA, rq->errors, len, data); else @@ -1730,7 +1779,7 @@ void blk_dump_cmd(char *buf, struct request *rq) int len = rq->cmd_len; unsigned char *cmd = rq->cmd; - if (!blk_pc_request(rq)) { + if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { buf[0] = '\0'; return; } @@ -1755,21 +1804,23 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) if (rw & WRITE) rwbs[i++] = 'W'; - else if (rw & 1 << BIO_RW_DISCARD) + else if (rw & REQ_DISCARD) rwbs[i++] = 'D'; else if (bytes) rwbs[i++] = 'R'; else rwbs[i++] = 'N'; - if (rw & 1 << BIO_RW_AHEAD) + if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; - if (rw & 1 << BIO_RW_BARRIER) + if (rw & REQ_HARDBARRIER) rwbs[i++] = 'B'; - if (rw & 1 << BIO_RW_SYNCIO) + if (rw & REQ_SYNC) rwbs[i++] = 'S'; - if (rw & 1 << BIO_RW_META) + if (rw & REQ_META) rwbs[i++] = 'M'; + if (rw & REQ_SECURE) + rwbs[i++] = 'E'; rwbs[i] = '\0'; } @@ -1779,8 +1830,11 @@ void blk_fill_rwbs_rq(char *rwbs, struct request *rq) int rw = rq->cmd_flags & 0x03; int bytes; - if (blk_discard_rq(rq)) - rw |= (1 << BIO_RW_DISCARD); + if (rq->cmd_flags & REQ_DISCARD) + rw |= REQ_DISCARD; + + if (rq->cmd_flags & REQ_SECURE) + rw |= REQ_SECURE; bytes = blk_rq_bytes(rq); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9ca34cddaf6..2994a0e3a61 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2568,7 +2568,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on); int schedule_on_each_cpu(work_func_t func) { int cpu; - struct work_struct *works; + struct work_struct __percpu *works; works = alloc_percpu(struct work_struct); if (!works) @@ -3527,7 +3527,7 @@ static int __init init_workqueues(void) unsigned int cpu; int i; - hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); + cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); /* initialize gcwqs */ for_each_gcwq_cpu(cpu) { |