diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 27 | ||||
-rw-r--r-- | kernel/audit.h | 6 | ||||
-rw-r--r-- | kernel/auditfilter.c | 33 | ||||
-rw-r--r-- | kernel/auditsc.c | 133 | ||||
-rw-r--r-- | kernel/futex.c | 32 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.c | 19 | ||||
-rw-r--r-- | kernel/relay.c | 4 | ||||
-rw-r--r-- | kernel/seccomp.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 55 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 20 | ||||
-rw-r--r-- | kernel/tracepoint.c | 516 | ||||
-rw-r--r-- | kernel/user_namespace.c | 11 |
17 files changed, 486 insertions, 432 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 95a20f3f52f..7c2893602d0 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -182,7 +182,7 @@ struct audit_buffer { struct audit_reply { __u32 portid; - struct net *net; + struct net *net; struct sk_buff *skb; }; @@ -396,7 +396,7 @@ static void audit_printk_skb(struct sk_buff *skb) if (printk_ratelimit()) pr_notice("type=%d %s\n", nlh->nlmsg_type, data); else - audit_log_lost("printk limit exceeded\n"); + audit_log_lost("printk limit exceeded"); } audit_hold_skb(skb); @@ -412,7 +412,7 @@ static void kauditd_send_skb(struct sk_buff *skb) BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ if (audit_pid) { pr_err("*NO* daemon at audit_pid=%d\n", audit_pid); - audit_log_lost("auditd disappeared\n"); + audit_log_lost("auditd disappeared"); audit_pid = 0; audit_sock = NULL; } @@ -607,7 +607,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) { int err = 0; - /* Only support the initial namespaces for now. */ + /* Only support initial user namespace for now. */ /* * We return ECONNREFUSED because it tricks userspace into thinking * that audit was not configured into the kernel. Lots of users @@ -618,8 +618,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) * userspace will reject all logins. This should be removed when we * support non init namespaces!! */ - if ((current_user_ns() != &init_user_ns) || - (task_active_pid_ns(current) != &init_pid_ns)) + if (current_user_ns() != &init_user_ns) return -ECONNREFUSED; switch (msg_type) { @@ -639,6 +638,11 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) case AUDIT_TTY_SET: case AUDIT_TRIM: case AUDIT_MAKE_EQUIV: + /* Only support auditd and auditctl in initial pid namespace + * for now. */ + if ((task_active_pid_ns(current) != &init_pid_ns)) + return -EPERM; + if (!capable(CAP_AUDIT_CONTROL)) err = -EPERM; break; @@ -659,6 +663,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) { int rc = 0; uid_t uid = from_kuid(&init_user_ns, current_uid()); + pid_t pid = task_tgid_nr(current); if (!audit_enabled && msg_type != AUDIT_USER_AVC) { *ab = NULL; @@ -668,7 +673,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); if (unlikely(!*ab)) return rc; - audit_log_format(*ab, "pid=%d uid=%u", task_tgid_vnr(current), uid); + audit_log_format(*ab, "pid=%d uid=%u", pid, uid); audit_log_session_info(*ab); audit_log_task_context(*ab); @@ -1097,7 +1102,7 @@ static void __net_exit audit_net_exit(struct net *net) audit_sock = NULL; } - rcu_assign_pointer(aunet->nlsk, NULL); + RCU_INIT_POINTER(aunet->nlsk, NULL); synchronize_net(); netlink_kernel_release(sock); } @@ -1829,11 +1834,11 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) spin_unlock_irq(&tsk->sighand->siglock); audit_log_format(ab, - " ppid=%ld pid=%d auid=%u uid=%u gid=%u" + " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", - sys_getppid(), - tsk->pid, + task_ppid_nr(tsk), + task_pid_nr(tsk), from_kuid(&init_user_ns, audit_get_loginuid(tsk)), from_kuid(&init_user_ns, cred->uid), from_kgid(&init_user_ns, cred->gid), diff --git a/kernel/audit.h b/kernel/audit.h index 8df13221460..7bb65730c89 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -106,6 +106,11 @@ struct audit_names { bool should_free; }; +struct audit_proctitle { + int len; /* length of the cmdline field. */ + char *value; /* the cmdline field */ +}; + /* The per-task audit context. */ struct audit_context { int dummy; /* must be the first element */ @@ -202,6 +207,7 @@ struct audit_context { } execve; }; int fds[2]; + struct audit_proctitle proctitle; #if AUDIT_DEBUG int put_count; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 92062fd6cc8..8e9bc9c3dbb 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -19,6 +19,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/audit.h> #include <linux/kthread.h> @@ -226,7 +228,7 @@ static int audit_match_signal(struct audit_entry *entry) #endif /* Common user-space to kernel rule translation. */ -static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) +static inline struct audit_entry *audit_to_entry_common(struct audit_rule_data *rule) { unsigned listnr; struct audit_entry *entry; @@ -249,7 +251,7 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) ; } if (unlikely(rule->action == AUDIT_POSSIBLE)) { - printk(KERN_ERR "AUDIT_POSSIBLE is deprecated\n"); + pr_err("AUDIT_POSSIBLE is deprecated\n"); goto exit_err; } if (rule->action != AUDIT_NEVER && rule->action != AUDIT_ALWAYS) @@ -403,7 +405,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, int i; char *str; - entry = audit_to_entry_common((struct audit_rule *)data); + entry = audit_to_entry_common(data); if (IS_ERR(entry)) goto exit_nofree; @@ -431,6 +433,19 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->val = 0; } + if ((f->type == AUDIT_PID) || (f->type == AUDIT_PPID)) { + struct pid *pid; + rcu_read_lock(); + pid = find_vpid(f->val); + if (!pid) { + rcu_read_unlock(); + err = -ESRCH; + goto exit_free; + } + f->val = pid_nr(pid); + rcu_read_unlock(); + } + err = audit_field_valid(entry, f); if (err) goto exit_free; @@ -479,8 +494,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (err == -EINVAL) { - printk(KERN_WARNING "audit rule for LSM " - "\'%s\' is invalid\n", str); + pr_warn("audit rule for LSM \'%s\' is invalid\n", + str); err = 0; } if (err) { @@ -709,8 +724,8 @@ static inline int audit_dupe_lsm_field(struct audit_field *df, /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (ret == -EINVAL) { - printk(KERN_WARNING "audit rule for LSM \'%s\' is " - "invalid\n", df->lsm_str); + pr_warn("audit rule for LSM \'%s\' is invalid\n", + df->lsm_str); ret = 0; } @@ -1240,12 +1255,14 @@ static int audit_filter_user_rules(struct audit_krule *rule, int type, for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &rule->fields[i]; + pid_t pid; int result = 0; u32 sid; switch (f->type) { case AUDIT_PID: - result = audit_comparator(task_pid_vnr(current), f->op, f->val); + pid = task_pid_nr(current); + result = audit_comparator(pid, f->op, f->val); break; case AUDIT_UID: result = audit_uid_comparator(current_uid(), f->op, f->uid); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7aef2f4b6c6..f251a5e8d17 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -42,6 +42,8 @@ * and <dustin.kirkland@us.ibm.com> for LSPP certification compliance. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <asm/types.h> #include <linux/atomic.h> @@ -68,6 +70,7 @@ #include <linux/capability.h> #include <linux/fs_struct.h> #include <linux/compat.h> +#include <linux/ctype.h> #include "audit.h" @@ -79,6 +82,9 @@ /* no execve audit message should be longer than this (userspace limits) */ #define MAX_EXECVE_AUDIT_LEN 7500 +/* max length to print of cmdline/proctitle value during audit */ +#define MAX_PROCTITLE_AUDIT_LEN 128 + /* number of audit rules */ int audit_n_rules; @@ -451,15 +457,17 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_field *f = &rule->fields[i]; struct audit_names *n; int result = 0; + pid_t pid; switch (f->type) { case AUDIT_PID: - result = audit_comparator(tsk->pid, f->op, f->val); + pid = task_pid_nr(tsk); + result = audit_comparator(pid, f->op, f->val); break; case AUDIT_PPID: if (ctx) { if (!ctx->ppid) - ctx->ppid = sys_getppid(); + ctx->ppid = task_ppid_nr(tsk); result = audit_comparator(ctx->ppid, f->op, f->val); } break; @@ -805,7 +813,8 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) rcu_read_unlock(); } -static inline struct audit_context *audit_get_context(struct task_struct *tsk, +/* Transfer the audit context pointer to the caller, clearing it in the tsk's struct */ +static inline struct audit_context *audit_take_context(struct task_struct *tsk, int return_valid, long return_code) { @@ -842,6 +851,13 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, return context; } +static inline void audit_proctitle_free(struct audit_context *context) +{ + kfree(context->proctitle.value); + context->proctitle.value = NULL; + context->proctitle.len = 0; +} + static inline void audit_free_names(struct audit_context *context) { struct audit_names *n, *next; @@ -850,16 +866,15 @@ static inline void audit_free_names(struct audit_context *context) if (context->put_count + context->ino_count != context->name_count) { int i = 0; - printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d" - " name_count=%d put_count=%d" - " ino_count=%d [NOT freeing]\n", - __FILE__, __LINE__, + pr_err("%s:%d(:%d): major=%d in_syscall=%d" + " name_count=%d put_count=%d ino_count=%d" + " [NOT freeing]\n", __FILE__, __LINE__, context->serial, context->major, context->in_syscall, context->name_count, context->put_count, context->ino_count); list_for_each_entry(n, &context->names_list, list) { - printk(KERN_ERR "names[%d] = %p = %s\n", i++, - n->name, n->name->name ?: "(null)"); + pr_err("names[%d] = %p = %s\n", i++, n->name, + n->name->name ?: "(null)"); } dump_stack(); return; @@ -955,6 +970,7 @@ static inline void audit_free_context(struct audit_context *context) audit_free_aux(context); kfree(context->filterkey); kfree(context->sockaddr); + audit_proctitle_free(context); kfree(context); } @@ -1157,7 +1173,7 @@ static void audit_log_execve_info(struct audit_context *context, */ buf = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL); if (!buf) { - audit_panic("out of memory for argv string\n"); + audit_panic("out of memory for argv string"); return; } @@ -1271,6 +1287,59 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_end(ab); } +static inline int audit_proctitle_rtrim(char *proctitle, int len) +{ + char *end = proctitle + len - 1; + while (end > proctitle && !isprint(*end)) + end--; + + /* catch the case where proctitle is only 1 non-print character */ + len = end - proctitle + 1; + len -= isprint(proctitle[len-1]) == 0; + return len; +} + +static void audit_log_proctitle(struct task_struct *tsk, + struct audit_context *context) +{ + int res; + char *buf; + char *msg = "(null)"; + int len = strlen(msg); + struct audit_buffer *ab; + + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE); + if (!ab) + return; /* audit_panic or being filtered */ + + audit_log_format(ab, "proctitle="); + + /* Not cached */ + if (!context->proctitle.value) { + buf = kmalloc(MAX_PROCTITLE_AUDIT_LEN, GFP_KERNEL); + if (!buf) + goto out; + /* Historically called this from procfs naming */ + res = get_cmdline(tsk, buf, MAX_PROCTITLE_AUDIT_LEN); + if (res == 0) { + kfree(buf); + goto out; + } + res = audit_proctitle_rtrim(buf, res); + if (res == 0) { + kfree(buf); + goto out; + } + context->proctitle.value = buf; + context->proctitle.len = res; + } + msg = context->proctitle.value; + len = context->proctitle.len; +out: + audit_log_n_untrustedstring(ab, msg, len); + audit_log_end(ab); +} + static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { int i, call_panic = 0; @@ -1388,6 +1457,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_name(context, n, NULL, i++, &call_panic); } + audit_log_proctitle(tsk, context); + /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); if (ab) @@ -1406,7 +1477,7 @@ void __audit_free(struct task_struct *tsk) { struct audit_context *context; - context = audit_get_context(tsk, 0, 0); + context = audit_take_context(tsk, 0, 0); if (!context) return; @@ -1500,7 +1571,7 @@ void __audit_syscall_exit(int success, long return_code) else success = AUDITSC_FAILURE; - context = audit_get_context(tsk, success, return_code); + context = audit_take_context(tsk, success, return_code); if (!context) return; @@ -1550,7 +1621,7 @@ static inline void handle_one(const struct inode *inode) if (likely(put_tree_ref(context, chunk))) return; if (unlikely(!grow_tree_refs(context))) { - printk(KERN_WARNING "out of memory, audit has lost a tree reference\n"); + pr_warn("out of memory, audit has lost a tree reference\n"); audit_set_auditable(context); audit_put_chunk(chunk); unroll_tree_refs(context, p, count); @@ -1609,8 +1680,7 @@ retry: goto retry; } /* too bad */ - printk(KERN_WARNING - "out of memory, audit has lost a tree reference\n"); + pr_warn("out of memory, audit has lost a tree reference\n"); unroll_tree_refs(context, p, count); audit_set_auditable(context); return; @@ -1682,7 +1752,7 @@ void __audit_getname(struct filename *name) if (!context->in_syscall) { #if AUDIT_DEBUG == 2 - printk(KERN_ERR "%s:%d(:%d): ignoring getname(%p)\n", + pr_err("%s:%d(:%d): ignoring getname(%p)\n", __FILE__, __LINE__, context->serial, name); dump_stack(); #endif @@ -1721,15 +1791,15 @@ void audit_putname(struct filename *name) BUG_ON(!context); if (!name->aname || !context->in_syscall) { #if AUDIT_DEBUG == 2 - printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n", + pr_err("%s:%d(:%d): final_putname(%p)\n", __FILE__, __LINE__, context->serial, name); if (context->name_count) { struct audit_names *n; int i = 0; list_for_each_entry(n, &context->names_list, list) - printk(KERN_ERR "name[%d] = %p = %s\n", i++, - n->name, n->name->name ?: "(null)"); + pr_err("name[%d] = %p = %s\n", i++, n->name, + n->name->name ?: "(null)"); } #endif final_putname(name); @@ -1738,9 +1808,8 @@ void audit_putname(struct filename *name) else { ++context->put_count; if (context->put_count > context->name_count) { - printk(KERN_ERR "%s:%d(:%d): major=%d" - " in_syscall=%d putname(%p) name_count=%d" - " put_count=%d\n", + pr_err("%s:%d(:%d): major=%d in_syscall=%d putname(%p)" + " name_count=%d put_count=%d\n", __FILE__, __LINE__, context->serial, context->major, context->in_syscall, name->name, @@ -1981,12 +2050,10 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; - audit_log_format(ab, "pid=%d uid=%u" - " old-auid=%u new-auid=%u old-ses=%u new-ses=%u" - " res=%d", - current->pid, uid, - oldloginuid, loginuid, oldsessionid, sessionid, - !rc); + audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); + audit_log_task_context(ab); + audit_log_format(ab, " old-auid=%u auid=%u old-ses=%u ses=%u res=%d", + oldloginuid, loginuid, oldsessionid, sessionid, !rc); audit_log_end(ab); } @@ -2208,7 +2275,7 @@ void __audit_ptrace(struct task_struct *t) { struct audit_context *context = current->audit_context; - context->target_pid = t->pid; + context->target_pid = task_pid_nr(t); context->target_auid = audit_get_loginuid(t); context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); @@ -2233,7 +2300,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { - audit_sig_pid = tsk->pid; + audit_sig_pid = task_pid_nr(tsk); if (uid_valid(tsk->loginuid)) audit_sig_uid = tsk->loginuid; else @@ -2247,7 +2314,7 @@ int __audit_signal_info(int sig, struct task_struct *t) /* optimize the common case by putting first signal recipient directly * in audit_context */ if (!ctx->target_pid) { - ctx->target_pid = t->tgid; + ctx->target_pid = task_tgid_nr(t); ctx->target_auid = audit_get_loginuid(t); ctx->target_uid = t_uid; ctx->target_sessionid = audit_get_sessionid(t); @@ -2268,7 +2335,7 @@ int __audit_signal_info(int sig, struct task_struct *t) } BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS); - axp->target_pid[axp->pid_count] = t->tgid; + axp->target_pid[axp->pid_count] = task_tgid_nr(t); axp->target_auid[axp->pid_count] = audit_get_loginuid(t); axp->target_uid[axp->pid_count] = t_uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); @@ -2368,7 +2435,7 @@ static void audit_log_task(struct audit_buffer *ab) from_kgid(&init_user_ns, gid), sessionid); audit_log_task_context(ab); - audit_log_format(ab, " pid=%d comm=", current->pid); + audit_log_format(ab, " pid=%d comm=", task_pid_nr(current)); audit_log_untrustedstring(ab, current->comm); if (mm) { down_read(&mm->mmap_sem); diff --git a/kernel/futex.c b/kernel/futex.c index 6801b3751a9..5f589279e46 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -70,7 +70,10 @@ #include "locking/rtmutex_common.h" /* - * Basic futex operation and ordering guarantees: + * READ this before attempting to hack on futexes! + * + * Basic futex operation and ordering guarantees + * ============================================= * * The waiter reads the futex value in user space and calls * futex_wait(). This function computes the hash bucket and acquires @@ -119,7 +122,7 @@ * sys_futex(WAIT, futex, val); * futex_wait(futex, val); * - * waiters++; + * waiters++; (a) * mb(); (A) <-- paired with -. * | * lock(hash_bucket(futex)); | @@ -135,14 +138,14 @@ * unlock(hash_bucket(futex)); * schedule(); if (waiters) * lock(hash_bucket(futex)); - * wake_waiters(futex); - * unlock(hash_bucket(futex)); + * else wake_waiters(futex); + * waiters--; (b) unlock(hash_bucket(futex)); * - * Where (A) orders the waiters increment and the futex value read -- this - * is guaranteed by the head counter in the hb spinlock; and where (B) - * orders the write to futex and the waiters read -- this is done by the - * barriers in get_futex_key_refs(), through either ihold or atomic_inc, - * depending on the futex type. + * Where (A) orders the waiters increment and the futex value read through + * atomic operations (see hb_waiters_inc) and where (B) orders the write + * to futex and the waiters read -- this is done by the barriers in + * get_futex_key_refs(), through either ihold or atomic_inc, depending on the + * futex type. * * This yields the following case (where X:=waiters, Y:=futex): * @@ -155,6 +158,17 @@ * Which guarantees that x==0 && y==0 is impossible; which translates back into * the guarantee that we cannot both miss the futex variable change and the * enqueue. + * + * Note that a new waiter is accounted for in (a) even when it is possible that + * the wait call can return error, in which case we backtrack from it in (b). + * Refer to the comment in queue_lock(). + * + * Similarly, in order to account for waiters being requeued on another + * address we always increment the waiters for the destination bucket before + * acquiring the lock. It then decrements them again after releasing it - + * the code that actually moves the futex(es) between hash buckets (requeue_futex) + * will do the additional required waiter count housekeeping. This is done for + * double_lock_hb() and double_unlock_hb(), respectively. */ #ifndef CONFIG_HAVE_FUTEX_CMPXCHG diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index e1191c996c5..5cf6731b98e 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -71,18 +71,17 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, void debug_mutex_unlock(struct mutex *lock) { - if (unlikely(!debug_locks)) - return; + if (likely(debug_locks)) { + DEBUG_LOCKS_WARN_ON(lock->magic != lock); - DEBUG_LOCKS_WARN_ON(lock->magic != lock); + if (!lock->owner) + DEBUG_LOCKS_WARN_ON(!lock->owner); + else + DEBUG_LOCKS_WARN_ON(lock->owner != current); - if (!lock->owner) - DEBUG_LOCKS_WARN_ON(!lock->owner); - else - DEBUG_LOCKS_WARN_ON(lock->owner != current); - - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); + DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); + mutex_clear_owner(lock); + } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug diff --git a/kernel/relay.c b/kernel/relay.c index 52d6a6f5626..5a56d3c8dc0 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1195,8 +1195,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe, static const struct pipe_buf_operations relay_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = relay_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -1253,7 +1251,7 @@ static ssize_t subbuf_splice_actor(struct file *in, subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; pidx = (read_start / PAGE_SIZE) % subbuf_pages; poff = read_start & ~PAGE_MASK; - nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers); + nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max); for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { unsigned int this_len, this_end, private; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index fd609bd9d6d..590c3792508 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -69,18 +69,17 @@ static void populate_seccomp_data(struct seccomp_data *sd) { struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); + unsigned long args[6]; sd->nr = syscall_get_nr(task, regs); - sd->arch = syscall_get_arch(task, regs); - - /* Unroll syscall_get_args to help gcc on arm. */ - syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); - syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); - syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); - syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); - syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); - syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); - + sd->arch = syscall_get_arch(); + syscall_get_arguments(task, regs, 0, 6, args); + sd->args[0] = args[0]; + sd->args[1] = args[1]; + sd->args[2] = args[2]; + sd->args[3] = args[3]; + sd->args[4] = args[4]; + sd->args[5] = args[5]; sd->instruction_pointer = KSTK_EIP(task); } @@ -348,7 +347,7 @@ static void seccomp_send_sigsys(int syscall, int reason) info.si_code = SYS_SECCOMP; info.si_call_addr = (void __user *)KSTK_EIP(current); info.si_errno = reason; - info.si_arch = syscall_get_arch(current, task_pt_regs(current)); + info.si_arch = syscall_get_arch(); info.si_syscall = syscall; force_sig_info(SIGSYS, &info, current); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9be67c5e5b0..737b0efa1a6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3611,6 +3611,8 @@ static const char readme_msg[] = #ifdef CONFIG_TRACER_SNAPSHOT "\t\t snapshot\n" #endif + "\t\t dump\n" + "\t\t cpudump\n" "\t example: echo do_fault:traceoff > set_ftrace_filter\n" "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" "\t The first one will disable tracing every time do_fault is hit\n" @@ -4390,8 +4392,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, static const struct pipe_buf_operations tracing_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -4486,7 +4486,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_access_lock(iter->cpu_file); /* Fill as many pages as possible. */ - for (i = 0, rem = len; i < pipe->buffers && rem; i++) { + for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { spd.pages[i] = alloc_page(GFP_KERNEL); if (!spd.pages[i]) break; @@ -5279,8 +5279,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -5356,7 +5354,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, trace_access_lock(iter->cpu_file); entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); - for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { + for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) { struct page *page; int r; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 83a4378dc5e..3ddfd8f62c0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -223,24 +223,25 @@ int ftrace_event_reg(struct ftrace_event_call *call, { struct ftrace_event_file *file = data; + WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); switch (type) { case TRACE_REG_REGISTER: - return tracepoint_probe_register(call->name, + return tracepoint_probe_register(call->tp, call->class->probe, file); case TRACE_REG_UNREGISTER: - tracepoint_probe_unregister(call->name, + tracepoint_probe_unregister(call->tp, call->class->probe, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return tracepoint_probe_register(call->name, + return tracepoint_probe_register(call->tp, call->class->perf_probe, call); case TRACE_REG_PERF_UNREGISTER: - tracepoint_probe_unregister(call->name, + tracepoint_probe_unregister(call->tp, call->class->perf_probe, call); return 0; @@ -352,7 +353,7 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, if (ret) { tracing_stop_cmdline_record(); pr_info("event trace: Could not enable event " - "%s\n", call->name); + "%s\n", ftrace_event_name(call)); break; } set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); @@ -481,27 +482,29 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, { struct ftrace_event_file *file; struct ftrace_event_call *call; + const char *name; int ret = -EINVAL; list_for_each_entry(file, &tr->events, list) { call = file->event_call; + name = ftrace_event_name(call); - if (!call->name || !call->class || !call->class->reg) + if (!name || !call->class || !call->class->reg) continue; if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) continue; if (match && - strcmp(match, call->name) != 0 && + strcmp(match, name) != 0 && strcmp(match, call->class->system) != 0) continue; if (sub && strcmp(sub, call->class->system) != 0) continue; - if (event && strcmp(event, call->name) != 0) + if (event && strcmp(event, name) != 0) continue; ftrace_event_enable_disable(file, set); @@ -699,7 +702,7 @@ static int t_show(struct seq_file *m, void *v) if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); - seq_printf(m, "%s\n", call->name); + seq_printf(m, "%s\n", ftrace_event_name(call)); return 0; } @@ -792,7 +795,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!call->name || !call->class || !call->class->reg) + if (!ftrace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) @@ -907,7 +910,7 @@ static int f_show(struct seq_file *m, void *v) switch ((unsigned long)v) { case FORMAT_HEADER: - seq_printf(m, "name: %s\n", call->name); + seq_printf(m, "name: %s\n", ftrace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); seq_printf(m, "format:\n"); return 0; @@ -1527,6 +1530,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) struct trace_array *tr = file->tr; struct list_head *head; struct dentry *d_events; + const char *name; int ret; /* @@ -1540,10 +1544,11 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) } else d_events = parent; - file->dir = debugfs_create_dir(call->name, d_events); + name = ftrace_event_name(call); + file->dir = debugfs_create_dir(name, d_events); if (!file->dir) { pr_warning("Could not create debugfs '%s' directory\n", - call->name); + name); return -1; } @@ -1567,7 +1572,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) ret = call->class->define_fields(call); if (ret < 0) { pr_warning("Could not initialize trace point" - " events/%s\n", call->name); + " events/%s\n", name); return -1; } } @@ -1631,15 +1636,17 @@ static void event_remove(struct ftrace_event_call *call) static int event_init(struct ftrace_event_call *call) { int ret = 0; + const char *name; - if (WARN_ON(!call->name)) + name = ftrace_event_name(call); + if (WARN_ON(!name)) return -EINVAL; if (call->class->raw_init) { ret = call->class->raw_init(call); if (ret < 0 && ret != -ENOSYS) pr_warn("Could not initialize trace events/%s\n", - call->name); + name); } return ret; @@ -1885,7 +1892,7 @@ __trace_add_event_dirs(struct trace_array *tr) ret = __trace_add_new_event(call, tr); if (ret < 0) pr_warning("Could not create directory for event %s\n", - call->name); + ftrace_event_name(call)); } } @@ -1894,18 +1901,20 @@ find_event_file(struct trace_array *tr, const char *system, const char *event) { struct ftrace_event_file *file; struct ftrace_event_call *call; + const char *name; list_for_each_entry(file, &tr->events, list) { call = file->event_call; + name = ftrace_event_name(call); - if (!call->name || !call->class || !call->class->reg) + if (!name || !call->class || !call->class->reg) continue; if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) continue; - if (strcmp(event, call->name) == 0 && + if (strcmp(event, name) == 0 && strcmp(system, call->class->system) == 0) return file; } @@ -1973,7 +1982,7 @@ event_enable_print(struct seq_file *m, unsigned long ip, seq_printf(m, "%s:%s:%s", data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, data->file->event_call->class->system, - data->file->event_call->name); + ftrace_event_name(data->file->event_call)); if (data->count == -1) seq_printf(m, ":unlimited\n"); @@ -2193,7 +2202,7 @@ __trace_early_add_event_dirs(struct trace_array *tr) ret = event_create_dir(tr->event_dir, file); if (ret < 0) pr_warning("Could not create directory for event %s\n", - file->event_call->name); + ftrace_event_name(file->event_call)); } } @@ -2217,7 +2226,7 @@ __trace_early_add_events(struct trace_array *tr) ret = __trace_early_add_new_event(call, tr); if (ret < 0) pr_warning("Could not create early event %s\n", - call->name); + ftrace_event_name(call)); } } @@ -2549,7 +2558,7 @@ static __init void event_trace_self_tests(void) continue; #endif - pr_info("Testing event %s: ", call->name); + pr_info("Testing event %s: ", ftrace_event_name(call)); /* * If an event is already enabled, someone is using diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 8efbb69b04f..925f537f07d 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1095,7 +1095,7 @@ event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, seq_printf(m, "%s:%s:%s", enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, enable_data->file->event_call->class->system, - enable_data->file->event_call->name); + ftrace_event_name(enable_data->file->event_call)); if (data->count == -1) seq_puts(m, ":unlimited"); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index ee0a5098ac4..d4ddde28a81 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -173,9 +173,11 @@ struct ftrace_event_class __refdata event_class_ftrace_##call = { \ }; \ \ struct ftrace_event_call __used event_##call = { \ - .name = #call, \ - .event.type = etype, \ .class = &event_class_ftrace_##call, \ + { \ + .name = #call, \ + }, \ + .event.type = etype, \ .print_fmt = print, \ .flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \ }; \ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d021d21dd15..903ae28962b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -341,7 +341,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, struct trace_kprobe *tk; list_for_each_entry(tk, &probe_list, list) - if (strcmp(tk->tp.call.name, event) == 0 && + if (strcmp(ftrace_event_name(&tk->tp.call), event) == 0 && strcmp(tk->tp.call.class->system, group) == 0) return tk; return NULL; @@ -516,7 +516,8 @@ static int register_trace_kprobe(struct trace_kprobe *tk) mutex_lock(&probe_lock); /* Delete old (same name) event if exist */ - old_tk = find_trace_kprobe(tk->tp.call.name, tk->tp.call.class->system); + old_tk = find_trace_kprobe(ftrace_event_name(&tk->tp.call), + tk->tp.call.class->system); if (old_tk) { ret = unregister_trace_kprobe(old_tk); if (ret < 0) @@ -564,7 +565,8 @@ static int trace_kprobe_module_callback(struct notifier_block *nb, if (ret) pr_warning("Failed to re-register probe %s on" "%s: %d\n", - tk->tp.call.name, mod->name, ret); + ftrace_event_name(&tk->tp.call), + mod->name, ret); } } mutex_unlock(&probe_lock); @@ -818,7 +820,8 @@ static int probes_seq_show(struct seq_file *m, void *v) int i; seq_printf(m, "%c", trace_kprobe_is_return(tk) ? 'r' : 'p'); - seq_printf(m, ":%s/%s", tk->tp.call.class->system, tk->tp.call.name); + seq_printf(m, ":%s/%s", tk->tp.call.class->system, + ftrace_event_name(&tk->tp.call)); if (!tk->symbol) seq_printf(m, " 0x%p", tk->rp.kp.addr); @@ -876,7 +879,8 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct trace_kprobe *tk = v; - seq_printf(m, " %-44s %15lu %15lu\n", tk->tp.call.name, tk->nhit, + seq_printf(m, " %-44s %15lu %15lu\n", + ftrace_event_name(&tk->tp.call), tk->nhit, tk->rp.kp.nmissed); return 0; @@ -1011,7 +1015,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags, field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - if (!trace_seq_printf(s, "%s: (", tp->call.name)) + if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) goto partial; if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) @@ -1047,7 +1051,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - if (!trace_seq_printf(s, "%s: (", tp->call.name)) + if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) goto partial; if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) @@ -1286,7 +1290,8 @@ static int register_kprobe_event(struct trace_kprobe *tk) call->data = tk; ret = trace_add_event_call(call); if (ret) { - pr_info("Failed to register kprobe event: %s\n", call->name); + pr_info("Failed to register kprobe event: %s\n", + ftrace_event_name(call)); kfree(call->print_fmt); unregister_ftrace_event(&call->event); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ca0e79e2aba..a436de18aa9 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -431,7 +431,7 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, } trace_seq_init(p); - ret = trace_seq_printf(s, "%s: ", event->name); + ret = trace_seq_printf(s, "%s: ", ftrace_event_name(event)); if (!ret) return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e4473367e7a..930e51462dc 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -294,7 +294,7 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou struct trace_uprobe *tu; list_for_each_entry(tu, &uprobe_list, list) - if (strcmp(tu->tp.call.name, event) == 0 && + if (strcmp(ftrace_event_name(&tu->tp.call), event) == 0 && strcmp(tu->tp.call.class->system, group) == 0) return tu; @@ -324,7 +324,8 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&uprobe_lock); /* register as an event */ - old_tu = find_probe_event(tu->tp.call.name, tu->tp.call.class->system); + old_tu = find_probe_event(ftrace_event_name(&tu->tp.call), + tu->tp.call.class->system); if (old_tu) { /* delete old event */ ret = unregister_trace_uprobe(old_tu); @@ -599,7 +600,8 @@ static int probes_seq_show(struct seq_file *m, void *v) char c = is_ret_probe(tu) ? 'r' : 'p'; int i; - seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, tu->tp.call.name); + seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, + ftrace_event_name(&tu->tp.call)); seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); for (i = 0; i < tu->tp.nr_args; i++) @@ -649,7 +651,8 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct trace_uprobe *tu = v; - seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->tp.call.name, tu->nhit); + seq_printf(m, " %s %-44s %15lu\n", tu->filename, + ftrace_event_name(&tu->tp.call), tu->nhit); return 0; } @@ -844,12 +847,14 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e tu = container_of(event, struct trace_uprobe, tp.call.event); if (is_ret_probe(tu)) { - if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->tp.call.name, + if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", + ftrace_event_name(&tu->tp.call), entry->vaddr[1], entry->vaddr[0])) goto partial; data = DATAOF_TRACE_ENTRY(entry, true); } else { - if (!trace_seq_printf(s, "%s: (0x%lx)", tu->tp.call.name, + if (!trace_seq_printf(s, "%s: (0x%lx)", + ftrace_event_name(&tu->tp.call), entry->vaddr[0])) goto partial; data = DATAOF_TRACE_ENTRY(entry, false); @@ -1275,7 +1280,8 @@ static int register_uprobe_event(struct trace_uprobe *tu) ret = trace_add_event_call(call); if (ret) { - pr_info("Failed to register uprobe event: %s\n", call->name); + pr_info("Failed to register uprobe event: %s\n", + ftrace_event_name(call)); kfree(call->print_fmt); unregister_ftrace_event(&call->event); } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index fb0a38a2655..ac5b23cf721 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Mathieu Desnoyers + * Copyright (C) 2008-2014 Mathieu Desnoyers * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,39 +33,27 @@ extern struct tracepoint * const __stop___tracepoints_ptrs[]; /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; +#ifdef CONFIG_MODULES /* - * Tracepoints mutex protects the builtin and module tracepoints and the hash - * table, as well as the local module list. + * Tracepoint module list mutex protects the local module list. */ -static DEFINE_MUTEX(tracepoints_mutex); +static DEFINE_MUTEX(tracepoint_module_list_mutex); -#ifdef CONFIG_MODULES -/* Local list of struct module */ +/* Local list of struct tp_module */ static LIST_HEAD(tracepoint_module_list); #endif /* CONFIG_MODULES */ /* - * Tracepoint hash table, containing the active tracepoints. - * Protected by tracepoints_mutex. + * tracepoints_mutex protects the builtin and module tracepoints. + * tracepoints_mutex nests inside tracepoint_module_list_mutex. */ -#define TRACEPOINT_HASH_BITS 6 -#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) -static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; +static DEFINE_MUTEX(tracepoints_mutex); /* * Note about RCU : * It is used to delay the free of multiple probes array until a quiescent * state is reached. - * Tracepoint entries modifications are protected by the tracepoints_mutex. */ -struct tracepoint_entry { - struct hlist_node hlist; - struct tracepoint_func *funcs; - int refcount; /* Number of times armed. 0 if disarmed. */ - int enabled; /* Tracepoint enabled */ - char name[0]; -}; - struct tp_probes { struct rcu_head rcu; struct tracepoint_func probes[0]; @@ -92,34 +80,33 @@ static inline void release_probes(struct tracepoint_func *old) } } -static void debug_print_probes(struct tracepoint_entry *entry) +static void debug_print_probes(struct tracepoint_func *funcs) { int i; - if (!tracepoint_debug || !entry->funcs) + if (!tracepoint_debug || !funcs) return; - for (i = 0; entry->funcs[i].func; i++) - printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func); + for (i = 0; funcs[i].func; i++) + printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func); } -static struct tracepoint_func * -tracepoint_entry_add_probe(struct tracepoint_entry *entry, - void *probe, void *data) +static struct tracepoint_func *func_add(struct tracepoint_func **funcs, + struct tracepoint_func *tp_func) { int nr_probes = 0; struct tracepoint_func *old, *new; - if (WARN_ON(!probe)) + if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); - debug_print_probes(entry); - old = entry->funcs; + debug_print_probes(*funcs); + old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ for (nr_probes = 0; old[nr_probes].func; nr_probes++) - if (old[nr_probes].func == probe && - old[nr_probes].data == data) + if (old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) return ERR_PTR(-EEXIST); } /* + 2 : one for new probe, one for NULL func */ @@ -128,33 +115,30 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, return ERR_PTR(-ENOMEM); if (old) memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); - new[nr_probes].func = probe; - new[nr_probes].data = data; + new[nr_probes] = *tp_func; new[nr_probes + 1].func = NULL; - entry->refcount = nr_probes + 1; - entry->funcs = new; - debug_print_probes(entry); + *funcs = new; + debug_print_probes(*funcs); return old; } -static void * -tracepoint_entry_remove_probe(struct tracepoint_entry *entry, - void *probe, void *data) +static void *func_remove(struct tracepoint_func **funcs, + struct tracepoint_func *tp_func) { int nr_probes = 0, nr_del = 0, i; struct tracepoint_func *old, *new; - old = entry->funcs; + old = *funcs; if (!old) return ERR_PTR(-ENOENT); - debug_print_probes(entry); + debug_print_probes(*funcs); /* (N -> M), (N > 1, M >= 0) probes */ - if (probe) { + if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == probe && - old[nr_probes].data == data) + if (old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) nr_del++; } } @@ -165,9 +149,8 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, */ if (nr_probes - nr_del == 0) { /* N -> 0, (N > 1) */ - entry->funcs = NULL; - entry->refcount = 0; - debug_print_probes(entry); + *funcs = NULL; + debug_print_probes(*funcs); return old; } else { int j = 0; @@ -177,91 +160,35 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, if (new == NULL) return ERR_PTR(-ENOMEM); for (i = 0; old[i].func; i++) - if (old[i].func != probe || old[i].data != data) + if (old[i].func != tp_func->func + || old[i].data != tp_func->data) new[j++] = old[i]; new[nr_probes - nr_del].func = NULL; - entry->refcount = nr_probes - nr_del; - entry->funcs = new; + *funcs = new; } - debug_print_probes(entry); + debug_print_probes(*funcs); return old; } /* - * Get tracepoint if the tracepoint is present in the tracepoint hash table. - * Must be called with tracepoints_mutex held. - * Returns NULL if not present. + * Add the probe function to a tracepoint. */ -static struct tracepoint_entry *get_tracepoint(const char *name) +static int tracepoint_add_func(struct tracepoint *tp, + struct tracepoint_func *func) { - struct hlist_head *head; - struct tracepoint_entry *e; - u32 hash = jhash(name, strlen(name), 0); - - head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, head, hlist) { - if (!strcmp(name, e->name)) - return e; - } - return NULL; -} + struct tracepoint_func *old, *tp_funcs; -/* - * Add the tracepoint to the tracepoint hash table. Must be called with - * tracepoints_mutex held. - */ -static struct tracepoint_entry *add_tracepoint(const char *name) -{ - struct hlist_head *head; - struct tracepoint_entry *e; - size_t name_len = strlen(name) + 1; - u32 hash = jhash(name, name_len-1, 0); - - head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, head, hlist) { - if (!strcmp(name, e->name)) { - printk(KERN_NOTICE - "tracepoint %s busy\n", name); - return ERR_PTR(-EEXIST); /* Already there */ - } - } - /* - * Using kmalloc here to allocate a variable length element. Could - * cause some memory fragmentation if overused. - */ - e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - memcpy(&e->name[0], name, name_len); - e->funcs = NULL; - e->refcount = 0; - e->enabled = 0; - hlist_add_head(&e->hlist, head); - return e; -} + if (tp->regfunc && !static_key_enabled(&tp->key)) + tp->regfunc(); -/* - * Remove the tracepoint from the tracepoint hash table. Must be called with - * mutex_lock held. - */ -static inline void remove_tracepoint(struct tracepoint_entry *e) -{ - hlist_del(&e->hlist); - kfree(e); -} - -/* - * Sets the probe callback corresponding to one tracepoint. - */ -static void set_tracepoint(struct tracepoint_entry **entry, - struct tracepoint *elem, int active) -{ - WARN_ON(strcmp((*entry)->name, elem->name) != 0); - - if (elem->regfunc && !static_key_enabled(&elem->key) && active) - elem->regfunc(); - else if (elem->unregfunc && static_key_enabled(&elem->key) && !active) - elem->unregfunc(); + tp_funcs = rcu_dereference_protected(tp->funcs, + lockdep_is_held(&tracepoints_mutex)); + old = func_add(&tp_funcs, func); + if (IS_ERR(old)) { + WARN_ON_ONCE(1); + return PTR_ERR(old); + } + release_probes(old); /* * rcu_assign_pointer has a smp_wmb() which makes sure that the new @@ -270,193 +197,90 @@ static void set_tracepoint(struct tracepoint_entry **entry, * include/linux/tracepoints.h. A matching smp_read_barrier_depends() * is used. */ - rcu_assign_pointer(elem->funcs, (*entry)->funcs); - if (active && !static_key_enabled(&elem->key)) - static_key_slow_inc(&elem->key); - else if (!active && static_key_enabled(&elem->key)) - static_key_slow_dec(&elem->key); + rcu_assign_pointer(tp->funcs, tp_funcs); + if (!static_key_enabled(&tp->key)) + static_key_slow_inc(&tp->key); + return 0; } /* - * Disable a tracepoint and its probe callback. + * Remove a probe function from a tracepoint. * Note: only waiting an RCU period after setting elem->call to the empty * function insures that the original callback is not used anymore. This insured * by preempt_disable around the call site. */ -static void disable_tracepoint(struct tracepoint *elem) +static int tracepoint_remove_func(struct tracepoint *tp, + struct tracepoint_func *func) { - if (elem->unregfunc && static_key_enabled(&elem->key)) - elem->unregfunc(); - - if (static_key_enabled(&elem->key)) - static_key_slow_dec(&elem->key); - rcu_assign_pointer(elem->funcs, NULL); -} + struct tracepoint_func *old, *tp_funcs; -/** - * tracepoint_update_probe_range - Update a probe range - * @begin: beginning of the range - * @end: end of the range - * - * Updates the probe callback corresponding to a range of tracepoints. - * Called with tracepoints_mutex held. - */ -static void tracepoint_update_probe_range(struct tracepoint * const *begin, - struct tracepoint * const *end) -{ - struct tracepoint * const *iter; - struct tracepoint_entry *mark_entry; - - if (!begin) - return; - - for (iter = begin; iter < end; iter++) { - mark_entry = get_tracepoint((*iter)->name); - if (mark_entry) { - set_tracepoint(&mark_entry, *iter, - !!mark_entry->refcount); - mark_entry->enabled = !!mark_entry->refcount; - } else { - disable_tracepoint(*iter); - } + tp_funcs = rcu_dereference_protected(tp->funcs, + lockdep_is_held(&tracepoints_mutex)); + old = func_remove(&tp_funcs, func); + if (IS_ERR(old)) { + WARN_ON_ONCE(1); + return PTR_ERR(old); } -} - -#ifdef CONFIG_MODULES -void module_update_tracepoints(void) -{ - struct tp_module *tp_mod; - - list_for_each_entry(tp_mod, &tracepoint_module_list, list) - tracepoint_update_probe_range(tp_mod->tracepoints_ptrs, - tp_mod->tracepoints_ptrs + tp_mod->num_tracepoints); -} -#else /* CONFIG_MODULES */ -void module_update_tracepoints(void) -{ -} -#endif /* CONFIG_MODULES */ + release_probes(old); + if (!tp_funcs) { + /* Removed last function */ + if (tp->unregfunc && static_key_enabled(&tp->key)) + tp->unregfunc(); -/* - * Update probes, removing the faulty probes. - * Called with tracepoints_mutex held. - */ -static void tracepoint_update_probes(void) -{ - /* Core kernel tracepoints */ - tracepoint_update_probe_range(__start___tracepoints_ptrs, - __stop___tracepoints_ptrs); - /* tracepoints in modules. */ - module_update_tracepoints(); -} - -static struct tracepoint_func * -tracepoint_add_probe(const char *name, void *probe, void *data) -{ - struct tracepoint_entry *entry; - struct tracepoint_func *old; - - entry = get_tracepoint(name); - if (!entry) { - entry = add_tracepoint(name); - if (IS_ERR(entry)) - return (struct tracepoint_func *)entry; + if (static_key_enabled(&tp->key)) + static_key_slow_dec(&tp->key); } - old = tracepoint_entry_add_probe(entry, probe, data); - if (IS_ERR(old) && !entry->refcount) - remove_tracepoint(entry); - return old; + rcu_assign_pointer(tp->funcs, tp_funcs); + return 0; } /** * tracepoint_probe_register - Connect a probe to a tracepoint - * @name: tracepoint name + * @tp: tracepoint * @probe: probe handler - * @data: probe private data - * - * Returns: - * - 0 if the probe was successfully registered, and tracepoint - * callsites are currently loaded for that probe, - * - -ENODEV if the probe was successfully registered, but no tracepoint - * callsite is currently loaded for that probe, - * - other negative error value on error. - * - * When tracepoint_probe_register() returns either 0 or -ENODEV, - * parameters @name, @probe, and @data may be used by the tracepoint - * infrastructure until the probe is unregistered. * - * The probe address must at least be aligned on the architecture pointer size. + * Returns 0 if ok, error value on error. + * Note: if @tp is within a module, the caller is responsible for + * unregistering the probe before the module is gone. This can be + * performed either with a tracepoint module going notifier, or from + * within module exit functions. */ -int tracepoint_probe_register(const char *name, void *probe, void *data) +int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) { - struct tracepoint_func *old; - struct tracepoint_entry *entry; - int ret = 0; + struct tracepoint_func tp_func; + int ret; mutex_lock(&tracepoints_mutex); - old = tracepoint_add_probe(name, probe, data); - if (IS_ERR(old)) { - mutex_unlock(&tracepoints_mutex); - return PTR_ERR(old); - } - tracepoint_update_probes(); /* may update entry */ - entry = get_tracepoint(name); - /* Make sure the entry was enabled */ - if (!entry || !entry->enabled) - ret = -ENODEV; + tp_func.func = probe; + tp_func.data = data; + ret = tracepoint_add_func(tp, &tp_func); mutex_unlock(&tracepoints_mutex); - release_probes(old); return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_register); -static struct tracepoint_func * -tracepoint_remove_probe(const char *name, void *probe, void *data) -{ - struct tracepoint_entry *entry; - struct tracepoint_func *old; - - entry = get_tracepoint(name); - if (!entry) - return ERR_PTR(-ENOENT); - old = tracepoint_entry_remove_probe(entry, probe, data); - if (IS_ERR(old)) - return old; - if (!entry->refcount) - remove_tracepoint(entry); - return old; -} - /** * tracepoint_probe_unregister - Disconnect a probe from a tracepoint - * @name: tracepoint name + * @tp: tracepoint * @probe: probe function pointer - * @data: probe private data * - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. + * Returns 0 if ok, error value on error. */ -int tracepoint_probe_unregister(const char *name, void *probe, void *data) +int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) { - struct tracepoint_func *old; + struct tracepoint_func tp_func; + int ret; mutex_lock(&tracepoints_mutex); - old = tracepoint_remove_probe(name, probe, data); - if (IS_ERR(old)) { - mutex_unlock(&tracepoints_mutex); - return PTR_ERR(old); - } - tracepoint_update_probes(); /* may update entry */ + tp_func.func = probe; + tp_func.data = data; + ret = tracepoint_remove_func(tp, &tp_func); mutex_unlock(&tracepoints_mutex); - release_probes(old); - return 0; + return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); - #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { @@ -464,6 +288,74 @@ bool trace_module_has_bad_taint(struct module *mod) (1 << TAINT_UNSIGNED_MODULE)); } +static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list); + +/** + * register_tracepoint_notifier - register tracepoint coming/going notifier + * @nb: notifier block + * + * Notifiers registered with this function are called on module + * coming/going with the tracepoint_module_list_mutex held. + * The notifier block callback should expect a "struct tp_module" data + * pointer. + */ +int register_tracepoint_module_notifier(struct notifier_block *nb) +{ + struct tp_module *tp_mod; + int ret; + + mutex_lock(&tracepoint_module_list_mutex); + ret = blocking_notifier_chain_register(&tracepoint_notify_list, nb); + if (ret) + goto end; + list_for_each_entry(tp_mod, &tracepoint_module_list, list) + (void) nb->notifier_call(nb, MODULE_STATE_COMING, tp_mod); +end: + mutex_unlock(&tracepoint_module_list_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier); + +/** + * unregister_tracepoint_notifier - unregister tracepoint coming/going notifier + * @nb: notifier block + * + * The notifier block callback should expect a "struct tp_module" data + * pointer. + */ +int unregister_tracepoint_module_notifier(struct notifier_block *nb) +{ + struct tp_module *tp_mod; + int ret; + + mutex_lock(&tracepoint_module_list_mutex); + ret = blocking_notifier_chain_unregister(&tracepoint_notify_list, nb); + if (ret) + goto end; + list_for_each_entry(tp_mod, &tracepoint_module_list, list) + (void) nb->notifier_call(nb, MODULE_STATE_GOING, tp_mod); +end: + mutex_unlock(&tracepoint_module_list_mutex); + return ret; + +} +EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier); + +/* + * Ensure the tracer unregistered the module's probes before the module + * teardown is performed. Prevents leaks of probe and data pointers. + */ +static void tp_module_going_check_quiescent(struct tracepoint * const *begin, + struct tracepoint * const *end) +{ + struct tracepoint * const *iter; + + if (!begin) + return; + for (iter = begin; iter < end; iter++) + WARN_ON_ONCE((*iter)->funcs); +} + static int tracepoint_module_coming(struct module *mod) { struct tp_module *tp_mod; @@ -479,36 +371,41 @@ static int tracepoint_module_coming(struct module *mod) */ if (trace_module_has_bad_taint(mod)) return 0; - mutex_lock(&tracepoints_mutex); + mutex_lock(&tracepoint_module_list_mutex); tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); if (!tp_mod) { ret = -ENOMEM; goto end; } - tp_mod->num_tracepoints = mod->num_tracepoints; - tp_mod->tracepoints_ptrs = mod->tracepoints_ptrs; + tp_mod->mod = mod; list_add_tail(&tp_mod->list, &tracepoint_module_list); - tracepoint_update_probe_range(mod->tracepoints_ptrs, - mod->tracepoints_ptrs + mod->num_tracepoints); + blocking_notifier_call_chain(&tracepoint_notify_list, + MODULE_STATE_COMING, tp_mod); end: - mutex_unlock(&tracepoints_mutex); + mutex_unlock(&tracepoint_module_list_mutex); return ret; } -static int tracepoint_module_going(struct module *mod) +static void tracepoint_module_going(struct module *mod) { - struct tp_module *pos; + struct tp_module *tp_mod; if (!mod->num_tracepoints) - return 0; + return; - mutex_lock(&tracepoints_mutex); - tracepoint_update_probe_range(mod->tracepoints_ptrs, - mod->tracepoints_ptrs + mod->num_tracepoints); - list_for_each_entry(pos, &tracepoint_module_list, list) { - if (pos->tracepoints_ptrs == mod->tracepoints_ptrs) { - list_del(&pos->list); - kfree(pos); + mutex_lock(&tracepoint_module_list_mutex); + list_for_each_entry(tp_mod, &tracepoint_module_list, list) { + if (tp_mod->mod == mod) { + blocking_notifier_call_chain(&tracepoint_notify_list, + MODULE_STATE_GOING, tp_mod); + list_del(&tp_mod->list); + kfree(tp_mod); + /* + * Called the going notifier before checking for + * quiescence. + */ + tp_module_going_check_quiescent(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints); break; } } @@ -518,12 +415,11 @@ static int tracepoint_module_going(struct module *mod) * flag on "going", in case a module taints the kernel only after being * loaded. */ - mutex_unlock(&tracepoints_mutex); - return 0; + mutex_unlock(&tracepoint_module_list_mutex); } -int tracepoint_module_notify(struct notifier_block *self, - unsigned long val, void *data) +static int tracepoint_module_notify(struct notifier_block *self, + unsigned long val, void *data) { struct module *mod = data; int ret = 0; @@ -535,24 +431,58 @@ int tracepoint_module_notify(struct notifier_block *self, case MODULE_STATE_LIVE: break; case MODULE_STATE_GOING: - ret = tracepoint_module_going(mod); + tracepoint_module_going(mod); + break; + case MODULE_STATE_UNFORMED: break; } return ret; } -struct notifier_block tracepoint_module_nb = { +static struct notifier_block tracepoint_module_nb = { .notifier_call = tracepoint_module_notify, .priority = 0, }; -static int init_tracepoints(void) +static __init int init_tracepoints(void) { - return register_module_notifier(&tracepoint_module_nb); + int ret; + + ret = register_module_notifier(&tracepoint_module_nb); + if (ret) + pr_warning("Failed to register tracepoint module enter notifier\n"); + + return ret; } __initcall(init_tracepoints); #endif /* CONFIG_MODULES */ +static void for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + struct tracepoint * const *iter; + + if (!begin) + return; + for (iter = begin; iter < end; iter++) + fct(*iter, priv); +} + +/** + * for_each_kernel_tracepoint - iteration on all kernel tracepoints + * @fct: callback + * @priv: private data + */ +void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + for_each_tracepoint_range(__start___tracepoints_ptrs, + __stop___tracepoints_ptrs, fct, priv); +} +EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint); + #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d8f6023fd8..bf71b4b2d63 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; @@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, * were written before the count of the extents. * * To achieve this smp_wmb() is used on guarantee the write - * order and smp_read_barrier_depends() is guaranteed that we - * don't have crazy architectures returning stale data. - * + * order and smp_rmb() is guaranteed that we don't have crazy + * architectures returning stale data. */ mutex_lock(&id_map_mutex); |