From 2db270a80b8f2238e536876cfb3987af02684df8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Feb 2009 20:46:45 +0100 Subject: tracing/blktrace: move the tracing file to kernel/trace Impact: cleanup Move blktrace.c to kernel/trace, also move its config entry. Signed-off-by: Frederic Weisbecker Acked-by: Arnaldo Carvalho de Melo Acked-by: Jens Axboe Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 1538 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1538 insertions(+) create mode 100644 kernel/trace/blktrace.c (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c new file mode 100644 index 00000000000..3b91da06482 --- /dev/null +++ b/kernel/trace/blktrace.c @@ -0,0 +1,1538 @@ +/* + * Copyright (C) 2006 Jens Axboe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "trace_output.h" + +static unsigned int blktrace_seq __read_mostly = 1; + +static struct trace_array *blk_tr; +static int __read_mostly blk_tracer_enabled; + +/* Select an alternative, minimalistic output than the original one */ +#define TRACE_BLK_OPT_CLASSIC 0x1 + +static struct tracer_opt blk_tracer_opts[] = { + /* Default disable the minimalistic output */ + { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, + { } +}; + +static struct tracer_flags blk_tracer_flags = { + .val = 0, + .opts = blk_tracer_opts, +}; + +/* Global reference count of probes */ +static DEFINE_MUTEX(blk_probe_mutex); +static atomic_t blk_probes_ref = ATOMIC_INIT(0); + +static int blk_register_tracepoints(void); +static void blk_unregister_tracepoints(void); + +/* + * Send out a notify message. + */ +static void trace_note(struct blk_trace *bt, pid_t pid, int action, + const void *data, size_t len) +{ + struct blk_io_trace *t; + + if (!bt->rchan) + return; + + t = relay_reserve(bt->rchan, sizeof(*t) + len); + if (t) { + const int cpu = smp_processor_id(); + + t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; + t->time = ktime_to_ns(ktime_get()); + t->device = bt->dev; + t->action = action; + t->pid = pid; + t->cpu = cpu; + t->pdu_len = len; + memcpy((void *) t + sizeof(*t), data, len); + } +} + +/* + * Send out a notify for this process, if we haven't done so since a trace + * started + */ +static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) +{ + tsk->btrace_seq = blktrace_seq; + trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); +} + +static void trace_note_time(struct blk_trace *bt) +{ + struct timespec now; + unsigned long flags; + u32 words[2]; + + getnstimeofday(&now); + words[0] = now.tv_sec; + words[1] = now.tv_nsec; + + local_irq_save(flags); + trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words)); + local_irq_restore(flags); +} + +void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) +{ + int n; + va_list args; + unsigned long flags; + char *buf; + + if (blk_tr) { + va_start(args, fmt); + ftrace_vprintk(fmt, args); + va_end(args); + return; + } + + if (!bt->msg_data) + return; + + local_irq_save(flags); + buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); + va_start(args, fmt); + n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); + va_end(args); + + trace_note(bt, 0, BLK_TN_MESSAGE, buf, n); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(__trace_note_message); + +static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, + pid_t pid) +{ + if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) + return 1; + if (sector < bt->start_lba || sector > bt->end_lba) + return 1; + if (bt->pid && pid != bt->pid) + return 1; + + return 0; +} + +/* + * Data direction bit lookup + */ +static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), + BLK_TC_ACT(BLK_TC_WRITE) }; + +/* The ilog2() calls fall out because they're constant */ +#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) + +/* + * The worker for the various blk_add_trace*() types. Fills out a + * blk_io_trace structure and places it in a per-cpu subbuffer. + */ +static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, + int rw, u32 what, int error, int pdu_len, void *pdu_data) +{ + struct task_struct *tsk = current; + struct ring_buffer_event *event = NULL; + struct blk_io_trace *t; + unsigned long flags = 0; + unsigned long *sequence; + pid_t pid; + int cpu, pc = 0; + + if (unlikely(bt->trace_state != Blktrace_running || + !blk_tracer_enabled)) + return; + + what |= ddir_act[rw & WRITE]; + what |= MASK_TC_BIT(rw, BARRIER); + what |= MASK_TC_BIT(rw, SYNC); + what |= MASK_TC_BIT(rw, AHEAD); + what |= MASK_TC_BIT(rw, META); + what |= MASK_TC_BIT(rw, DISCARD); + + pid = tsk->pid; + if (unlikely(act_log_check(bt, what, sector, pid))) + return; + cpu = raw_smp_processor_id(); + + if (blk_tr) { + tracing_record_cmdline(current); + + pc = preempt_count(); + event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, + sizeof(*t) + pdu_len, + 0, pc); + if (!event) + return; + t = ring_buffer_event_data(event); + goto record_it; + } + + /* + * A word about the locking here - we disable interrupts to reserve + * some space in the relay per-cpu buffer, to prevent an irq + * from coming in and stepping on our toes. + */ + local_irq_save(flags); + + if (unlikely(tsk->btrace_seq != blktrace_seq)) + trace_note_tsk(bt, tsk); + + t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); + if (t) { + sequence = per_cpu_ptr(bt->sequence, cpu); + + t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; + t->sequence = ++(*sequence); + t->time = ktime_to_ns(ktime_get()); +record_it: + /* + * These two are not needed in ftrace as they are in the + * generic trace_entry, filled by tracing_generic_entry_update, + * but for the trace_event->bin() synthesizer benefit we do it + * here too. + */ + t->cpu = cpu; + t->pid = pid; + + t->sector = sector; + t->bytes = bytes; + t->action = what; + t->device = bt->dev; + t->error = error; + t->pdu_len = pdu_len; + + if (pdu_len) + memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); + + if (blk_tr) { + trace_buffer_unlock_commit(blk_tr, event, 0, pc); + return; + } + } + + local_irq_restore(flags); +} + +static struct dentry *blk_tree_root; +static DEFINE_MUTEX(blk_tree_mutex); + +static void blk_trace_cleanup(struct blk_trace *bt) +{ + debugfs_remove(bt->msg_file); + debugfs_remove(bt->dropped_file); + relay_close(bt->rchan); + free_percpu(bt->sequence); + free_percpu(bt->msg_data); + kfree(bt); + mutex_lock(&blk_probe_mutex); + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +int blk_trace_remove(struct request_queue *q) +{ + struct blk_trace *bt; + + bt = xchg(&q->blk_trace, NULL); + if (!bt) + return -EINVAL; + + if (bt->trace_state == Blktrace_setup || + bt->trace_state == Blktrace_stopped) + blk_trace_cleanup(bt); + + return 0; +} +EXPORT_SYMBOL_GPL(blk_trace_remove); + +static int blk_dropped_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + + return 0; +} + +static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct blk_trace *bt = filp->private_data; + char buf[16]; + + snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); + + return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); +} + +static const struct file_operations blk_dropped_fops = { + .owner = THIS_MODULE, + .open = blk_dropped_open, + .read = blk_dropped_read, +}; + +static int blk_msg_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + + return 0; +} + +static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, + size_t count, loff_t *ppos) +{ + char *msg; + struct blk_trace *bt; + + if (count > BLK_TN_MAX_MSG) + return -EINVAL; + + msg = kmalloc(count, GFP_KERNEL); + if (msg == NULL) + return -ENOMEM; + + if (copy_from_user(msg, buffer, count)) { + kfree(msg); + return -EFAULT; + } + + bt = filp->private_data; + __trace_note_message(bt, "%s", msg); + kfree(msg); + + return count; +} + +static const struct file_operations blk_msg_fops = { + .owner = THIS_MODULE, + .open = blk_msg_open, + .write = blk_msg_write, +}; + +/* + * Keep track of how many times we encountered a full subbuffer, to aid + * the user space app in telling how many lost events there were. + */ +static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, + void *prev_subbuf, size_t prev_padding) +{ + struct blk_trace *bt; + + if (!relay_buf_full(buf)) + return 1; + + bt = buf->chan->private_data; + atomic_inc(&bt->dropped); + return 0; +} + +static int blk_remove_buf_file_callback(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + debugfs_remove(dentry); + + /* + * this will fail for all but the last file, but that is ok. what we + * care about is the top level buts->name directory going away, when + * the last trace file is gone. Then we don't have to rmdir() that + * manually on trace stop, so it nicely solves the issue with + * force killing of running traces. + */ + + debugfs_remove(parent); + return 0; +} + +static struct dentry *blk_create_buf_file_callback(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + return debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); +} + +static struct rchan_callbacks blk_relay_callbacks = { + .subbuf_start = blk_subbuf_start_callback, + .create_buf_file = blk_create_buf_file_callback, + .remove_buf_file = blk_remove_buf_file_callback, +}; + +/* + * Setup everything required to start tracing + */ +int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct blk_user_trace_setup *buts) +{ + struct blk_trace *old_bt, *bt = NULL; + struct dentry *dir = NULL; + int ret, i; + + if (!buts->buf_size || !buts->buf_nr) + return -EINVAL; + + strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); + buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; + + /* + * some device names have larger paths - convert the slashes + * to underscores for this to work as expected + */ + for (i = 0; i < strlen(buts->name); i++) + if (buts->name[i] == '/') + buts->name[i] = '_'; + + ret = -ENOMEM; + bt = kzalloc(sizeof(*bt), GFP_KERNEL); + if (!bt) + goto err; + + bt->sequence = alloc_percpu(unsigned long); + if (!bt->sequence) + goto err; + + bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG); + if (!bt->msg_data) + goto err; + + ret = -ENOENT; + + if (!blk_tree_root) { + blk_tree_root = debugfs_create_dir("block", NULL); + if (!blk_tree_root) + return -ENOMEM; + } + + dir = debugfs_create_dir(buts->name, blk_tree_root); + + if (!dir) + goto err; + + bt->dir = dir; + bt->dev = dev; + atomic_set(&bt->dropped, 0); + + ret = -EIO; + bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, + &blk_dropped_fops); + if (!bt->dropped_file) + goto err; + + bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); + if (!bt->msg_file) + goto err; + + bt->rchan = relay_open("trace", dir, buts->buf_size, + buts->buf_nr, &blk_relay_callbacks, bt); + if (!bt->rchan) + goto err; + + bt->act_mask = buts->act_mask; + if (!bt->act_mask) + bt->act_mask = (u16) -1; + + bt->start_lba = buts->start_lba; + bt->end_lba = buts->end_lba; + if (!bt->end_lba) + bt->end_lba = -1ULL; + + bt->pid = buts->pid; + bt->trace_state = Blktrace_setup; + + mutex_lock(&blk_probe_mutex); + if (atomic_add_return(1, &blk_probes_ref) == 1) { + ret = blk_register_tracepoints(); + if (ret) + goto probe_err; + } + mutex_unlock(&blk_probe_mutex); + + ret = -EBUSY; + old_bt = xchg(&q->blk_trace, bt); + if (old_bt) { + (void) xchg(&q->blk_trace, old_bt); + goto err; + } + + return 0; +probe_err: + atomic_dec(&blk_probes_ref); + mutex_unlock(&blk_probe_mutex); +err: + if (bt) { + if (bt->msg_file) + debugfs_remove(bt->msg_file); + if (bt->dropped_file) + debugfs_remove(bt->dropped_file); + free_percpu(bt->sequence); + free_percpu(bt->msg_data); + if (bt->rchan) + relay_close(bt->rchan); + kfree(bt); + } + return ret; +} + +int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + char __user *arg) +{ + struct blk_user_trace_setup buts; + int ret; + + ret = copy_from_user(&buts, arg, sizeof(buts)); + if (ret) + return -EFAULT; + + ret = do_blk_trace_setup(q, name, dev, &buts); + if (ret) + return ret; + + if (copy_to_user(arg, &buts, sizeof(buts))) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL_GPL(blk_trace_setup); + +int blk_trace_startstop(struct request_queue *q, int start) +{ + int ret; + struct blk_trace *bt = q->blk_trace; + + if (bt == NULL) + return -EINVAL; + + /* + * For starting a trace, we can transition from a setup or stopped + * trace. For stopping a trace, the state must be running + */ + ret = -EINVAL; + if (start) { + if (bt->trace_state == Blktrace_setup || + bt->trace_state == Blktrace_stopped) { + blktrace_seq++; + smp_mb(); + bt->trace_state = Blktrace_running; + + trace_note_time(bt); + ret = 0; + } + } else { + if (bt->trace_state == Blktrace_running) { + bt->trace_state = Blktrace_stopped; + relay_flush(bt->rchan); + ret = 0; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(blk_trace_startstop); + +/** + * blk_trace_ioctl: - handle the ioctls associated with tracing + * @bdev: the block device + * @cmd: the ioctl cmd + * @arg: the argument data, if any + * + **/ +int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) +{ + struct request_queue *q; + int ret, start = 0; + char b[BDEVNAME_SIZE]; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + mutex_lock(&bdev->bd_mutex); + + switch (cmd) { + case BLKTRACESETUP: + bdevname(bdev, b); + ret = blk_trace_setup(q, b, bdev->bd_dev, arg); + break; + case BLKTRACESTART: + start = 1; + case BLKTRACESTOP: + ret = blk_trace_startstop(q, start); + break; + case BLKTRACETEARDOWN: + ret = blk_trace_remove(q); + break; + default: + ret = -ENOTTY; + break; + } + + mutex_unlock(&bdev->bd_mutex); + return ret; +} + +/** + * blk_trace_shutdown: - stop and cleanup trace structures + * @q: the request queue associated with the device + * + **/ +void blk_trace_shutdown(struct request_queue *q) +{ + if (q->blk_trace) { + blk_trace_startstop(q, 0); + blk_trace_remove(q); + } +} + +/* + * blktrace probes + */ + +/** + * blk_add_trace_rq - Add a trace for a request oriented action + * @q: queue the io is for + * @rq: the source request + * @what: the action + * + * Description: + * Records an action against a request. Will log the bio offset + size. + * + **/ +static void blk_add_trace_rq(struct request_queue *q, struct request *rq, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + int rw = rq->cmd_flags & 0x03; + + if (likely(!bt)) + return; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + what |= BLK_TC_ACT(BLK_TC_PC); + __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, + sizeof(rq->cmd), rq->cmd); + } else { + what |= BLK_TC_ACT(BLK_TC_FS); + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + rw, what, rq->errors, 0, NULL); + } +} + +static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ABORT); +} + +static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_INSERT); +} + +static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ISSUE); +} + +static void blk_add_trace_rq_requeue(struct request_queue *q, + struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); +} + +static void blk_add_trace_rq_complete(struct request_queue *q, + struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); +} + +/** + * blk_add_trace_bio - Add a trace for a bio oriented action + * @q: queue the io is for + * @bio: the source bio + * @what: the action + * + * Description: + * Records an action against a bio. Will log the bio offset + size. + * + **/ +static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, + !bio_flagged(bio, BIO_UPTODATE), 0, NULL); +} + +static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); +} + +static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); +} + +static void blk_add_trace_bio_backmerge(struct request_queue *q, + struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); +} + +static void blk_add_trace_bio_frontmerge(struct request_queue *q, + struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); +} + +static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_QUEUE); +} + +static void blk_add_trace_getrq(struct request_queue *q, + struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_GETRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); + } +} + + +static void blk_add_trace_sleeprq(struct request_queue *q, + struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, + 0, 0, NULL); + } +} + +static void blk_add_trace_plug(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); +} + +static void blk_add_trace_unplug_io(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_unplug_timer(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_split(struct request_queue *q, struct bio *bio, + unsigned int pdu) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, + BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), + sizeof(rpdu), &rpdu); + } +} + +/** + * blk_add_trace_remap - Add a trace for a remap operation + * @q: queue the io is for + * @bio: the source bio + * @dev: target device + * @from: source sector + * @to: target sector + * + * Description: + * Device mapper or raid target sometimes need to split a bio because + * it spans a stripe (or similar). Add a trace for that action. + * + **/ +static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from, sector_t to) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device = cpu_to_be32(dev); + r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector = cpu_to_be64(to); + + __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, + !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); +} + +/** + * blk_add_driver_data - Add binary message with driver-specific data + * @q: queue the io is for + * @rq: io request + * @data: driver-specific data + * @len: length of driver-specific data + * + * Description: + * Some drivers might want to write driver-specific data per request. + * + **/ +void blk_add_driver_data(struct request_queue *q, + struct request *rq, + void *data, size_t len) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) + __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, + rq->errors, len, data); + else + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + 0, BLK_TA_DRV_DATA, rq->errors, len, data); +} +EXPORT_SYMBOL_GPL(blk_add_driver_data); + +static int blk_register_tracepoints(void) +{ + int ret; + + ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); + WARN_ON(ret); + ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); + WARN_ON(ret); + ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); + WARN_ON(ret); + ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); + WARN_ON(ret); + ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); + WARN_ON(ret); + ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); + WARN_ON(ret); + ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); + WARN_ON(ret); + ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + WARN_ON(ret); + ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + WARN_ON(ret); + ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); + WARN_ON(ret); + ret = register_trace_block_getrq(blk_add_trace_getrq); + WARN_ON(ret); + ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); + WARN_ON(ret); + ret = register_trace_block_plug(blk_add_trace_plug); + WARN_ON(ret); + ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); + WARN_ON(ret); + ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); + WARN_ON(ret); + ret = register_trace_block_split(blk_add_trace_split); + WARN_ON(ret); + ret = register_trace_block_remap(blk_add_trace_remap); + WARN_ON(ret); + return 0; +} + +static void blk_unregister_tracepoints(void) +{ + unregister_trace_block_remap(blk_add_trace_remap); + unregister_trace_block_split(blk_add_trace_split); + unregister_trace_block_unplug_io(blk_add_trace_unplug_io); + unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); + unregister_trace_block_plug(blk_add_trace_plug); + unregister_trace_block_sleeprq(blk_add_trace_sleeprq); + unregister_trace_block_getrq(blk_add_trace_getrq); + unregister_trace_block_bio_queue(blk_add_trace_bio_queue); + unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + unregister_trace_block_bio_complete(blk_add_trace_bio_complete); + unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); + unregister_trace_block_rq_complete(blk_add_trace_rq_complete); + unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); + unregister_trace_block_rq_issue(blk_add_trace_rq_issue); + unregister_trace_block_rq_insert(blk_add_trace_rq_insert); + unregister_trace_block_rq_abort(blk_add_trace_rq_abort); + + tracepoint_synchronize_unregister(); +} + +/* + * struct blk_io_tracer formatting routines + */ + +static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) +{ + int i = 0; + + if (t->action & BLK_TC_DISCARD) + rwbs[i++] = 'D'; + else if (t->action & BLK_TC_WRITE) + rwbs[i++] = 'W'; + else if (t->bytes) + rwbs[i++] = 'R'; + else + rwbs[i++] = 'N'; + + if (t->action & BLK_TC_AHEAD) + rwbs[i++] = 'A'; + if (t->action & BLK_TC_BARRIER) + rwbs[i++] = 'B'; + if (t->action & BLK_TC_SYNC) + rwbs[i++] = 'S'; + if (t->action & BLK_TC_META) + rwbs[i++] = 'M'; + + rwbs[i] = '\0'; +} + +static inline +const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent) +{ + return (const struct blk_io_trace *)ent; +} + +static inline const void *pdu_start(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent) + 1; +} + +static inline u32 t_sec(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->bytes >> 9; +} + +static inline unsigned long long t_sector(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->sector; +} + +static inline __u16 t_error(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->sector; +} + +static __u64 get_pdu_int(const struct trace_entry *ent) +{ + const __u64 *val = pdu_start(ent); + return be64_to_cpu(*val); +} + +static void get_pdu_remap(const struct trace_entry *ent, + struct blk_io_trace_remap *r) +{ + const struct blk_io_trace_remap *__r = pdu_start(ent); + __u64 sector = __r->sector; + + r->device = be32_to_cpu(__r->device); + r->device_from = be32_to_cpu(__r->device_from); + r->sector = be64_to_cpu(sector); +} + +static int blk_log_action_iter(struct trace_iterator *iter, const char *act) +{ + char rwbs[6]; + unsigned long long ts = ns2usecs(iter->ts); + unsigned long usec_rem = do_div(ts, USEC_PER_SEC); + unsigned secs = (unsigned long)ts; + const struct trace_entry *ent = iter->ent; + const struct blk_io_trace *t = (const struct blk_io_trace *)ent; + + fill_rwbs(rwbs, t); + + return trace_seq_printf(&iter->seq, + "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", + MAJOR(t->device), MINOR(t->device), iter->cpu, + secs, usec_rem, ent->pid, act, rwbs); +} + +static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, + const char *act) +{ + char rwbs[6]; + fill_rwbs(rwbs, t); + return trace_seq_printf(s, "%3d,%-3d %2s %3s ", + MAJOR(t->device), MINOR(t->device), act, rwbs); +} + +static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) +{ + const char *cmd = trace_find_cmdline(ent->pid); + + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%s]\n", + t_sector(ent), t_sec(ent), cmd); + return trace_seq_printf(s, "[%s]\n", cmd); +} + +static int blk_log_with_error(struct trace_seq *s, + const struct trace_entry *ent) +{ + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), + t_sec(ent), t_error(ent)); + return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); +} + +static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) +{ + struct blk_io_trace_remap r = { .device = 0, }; + + get_pdu_remap(ent, &r); + return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", + t_sector(ent), + t_sec(ent), MAJOR(r.device), MINOR(r.device), + (unsigned long long)r.sector); +} + +static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) +{ + return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid)); +} + +static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) +{ + return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid), + get_pdu_int(ent)); +} + +static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) +{ + return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), + get_pdu_int(ent), trace_find_cmdline(ent->pid)); +} + +/* + * struct tracer operations + */ + +static void blk_tracer_print_header(struct seq_file *m) +{ + if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + return; + seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n" + "# | | | | | |\n"); +} + +static void blk_tracer_start(struct trace_array *tr) +{ + mutex_lock(&blk_probe_mutex); + if (atomic_add_return(1, &blk_probes_ref) == 1) + if (blk_register_tracepoints()) + atomic_dec(&blk_probes_ref); + mutex_unlock(&blk_probe_mutex); + trace_flags &= ~TRACE_ITER_CONTEXT_INFO; +} + +static int blk_tracer_init(struct trace_array *tr) +{ + blk_tr = tr; + blk_tracer_start(tr); + mutex_lock(&blk_probe_mutex); + blk_tracer_enabled++; + mutex_unlock(&blk_probe_mutex); + return 0; +} + +static void blk_tracer_stop(struct trace_array *tr) +{ + trace_flags |= TRACE_ITER_CONTEXT_INFO; + mutex_lock(&blk_probe_mutex); + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +static void blk_tracer_reset(struct trace_array *tr) +{ + if (!atomic_read(&blk_probes_ref)) + return; + + mutex_lock(&blk_probe_mutex); + blk_tracer_enabled--; + WARN_ON(blk_tracer_enabled < 0); + mutex_unlock(&blk_probe_mutex); + + blk_tracer_stop(tr); +} + +static struct { + const char *act[2]; + int (*print)(struct trace_seq *s, const struct trace_entry *ent); +} what2act[] __read_mostly = { + [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, + [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, + [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, + [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, + [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, + [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, + [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, + [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, + [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, + [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, + [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, + [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, + [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, + [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, + [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, +}; + +static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, + int flags) +{ + struct trace_seq *s = &iter->seq; + const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; + const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); + int ret; + + if (!trace_print_context(iter)) + return TRACE_TYPE_PARTIAL_LINE; + + if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) + ret = trace_seq_printf(s, "Bad pc action %x\n", what); + else { + const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + ret = blk_log_action_seq(s, t, what2act[what].act[long_act]); + if (ret) + ret = what2act[what].print(s, iter->ent); + } + + return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +} + +static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; + const int offset = offsetof(struct blk_io_trace, sector); + struct blk_io_trace old = { + .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, + .time = ns2usecs(iter->ts), + }; + + if (!trace_seq_putmem(s, &old, offset)) + return 0; + return trace_seq_putmem(s, &t->sector, + sizeof(old) - offset + t->pdu_len); +} + +static enum print_line_t +blk_trace_event_print_binary(struct trace_iterator *iter, int flags) +{ + return blk_trace_synthesize_old_trace(iter) ? + TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +} + +static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) +{ + const struct blk_io_trace *t; + u16 what; + int ret; + + if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + return TRACE_TYPE_UNHANDLED; + + t = (const struct blk_io_trace *)iter->ent; + what = t->action & ((1 << BLK_TC_SHIFT) - 1); + + if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) + ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what); + else { + const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + ret = blk_log_action_iter(iter, what2act[what].act[long_act]); + if (ret) + ret = what2act[what].print(&iter->seq, iter->ent); + } + + return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +} + +static struct tracer blk_tracer __read_mostly = { + .name = "blk", + .init = blk_tracer_init, + .reset = blk_tracer_reset, + .start = blk_tracer_start, + .stop = blk_tracer_stop, + .print_header = blk_tracer_print_header, + .print_line = blk_tracer_print_line, + .flags = &blk_tracer_flags, +}; + +static struct trace_event trace_blk_event = { + .type = TRACE_BLK, + .trace = blk_trace_event_print, + .latency_trace = blk_trace_event_print, + .binary = blk_trace_event_print_binary, +}; + +static int __init init_blk_tracer(void) +{ + if (!register_ftrace_event(&trace_blk_event)) { + pr_warning("Warning: could not register block events\n"); + return 1; + } + + if (register_tracer(&blk_tracer) != 0) { + pr_warning("Warning: could not register the block tracer\n"); + unregister_ftrace_event(&trace_blk_event); + return 1; + } + + return 0; +} + +device_initcall(init_blk_tracer); + +static int blk_trace_remove_queue(struct request_queue *q) +{ + struct blk_trace *bt; + + bt = xchg(&q->blk_trace, NULL); + if (bt == NULL) + return -EINVAL; + + kfree(bt); + return 0; +} + +/* + * Setup everything required to start tracing + */ +static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) +{ + struct blk_trace *old_bt, *bt = NULL; + int ret; + + ret = -ENOMEM; + bt = kzalloc(sizeof(*bt), GFP_KERNEL); + if (!bt) + goto err; + + bt->dev = dev; + bt->act_mask = (u16)-1; + bt->end_lba = -1ULL; + bt->trace_state = Blktrace_running; + + old_bt = xchg(&q->blk_trace, bt); + if (old_bt != NULL) { + (void)xchg(&q->blk_trace, old_bt); + kfree(bt); + ret = -EBUSY; + } + return 0; +err: + return ret; +} + +/* + * sysfs interface to enable and configure tracing + */ + +static ssize_t sysfs_blk_trace_enable_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev; + ssize_t ret = -ENXIO; + + lock_kernel(); + bdev = bdget(part_devt(p)); + if (bdev != NULL) { + struct request_queue *q = bdev_get_queue(bdev); + + if (q != NULL) { + mutex_lock(&bdev->bd_mutex); + ret = sprintf(buf, "%u\n", !!q->blk_trace); + mutex_unlock(&bdev->bd_mutex); + } + + bdput(bdev); + } + + unlock_kernel(); + return ret; +} + +static ssize_t sysfs_blk_trace_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct block_device *bdev; + struct request_queue *q; + struct hd_struct *p; + int value; + ssize_t ret = -ENXIO; + + if (count == 0 || sscanf(buf, "%d", &value) != 1) + goto out; + + lock_kernel(); + p = dev_to_part(dev); + bdev = bdget(part_devt(p)); + if (bdev == NULL) + goto out_unlock_kernel; + + q = bdev_get_queue(bdev); + if (q == NULL) + goto out_bdput; + + mutex_lock(&bdev->bd_mutex); + if (value) + ret = blk_trace_setup_queue(q, bdev->bd_dev); + else + ret = blk_trace_remove_queue(q); + mutex_unlock(&bdev->bd_mutex); + + if (ret == 0) + ret = count; +out_bdput: + bdput(bdev); +out_unlock_kernel: + unlock_kernel(); +out: + return ret; +} + +static ssize_t sysfs_blk_trace_attr_show(struct device *dev, + struct device_attribute *attr, + char *buf); +static ssize_t sysfs_blk_trace_attr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +#define BLK_TRACE_DEVICE_ATTR(_name) \ + DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ + sysfs_blk_trace_attr_show, \ + sysfs_blk_trace_attr_store) + +static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, + sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store); +static BLK_TRACE_DEVICE_ATTR(act_mask); +static BLK_TRACE_DEVICE_ATTR(pid); +static BLK_TRACE_DEVICE_ATTR(start_lba); +static BLK_TRACE_DEVICE_ATTR(end_lba); + +static struct attribute *blk_trace_attrs[] = { + &dev_attr_enable.attr, + &dev_attr_act_mask.attr, + &dev_attr_pid.attr, + &dev_attr_start_lba.attr, + &dev_attr_end_lba.attr, + NULL +}; + +struct attribute_group blk_trace_attr_group = { + .name = "trace", + .attrs = blk_trace_attrs, +}; + +static int blk_str2act_mask(const char *str) +{ + int mask = 0; + char *copy = kstrdup(str, GFP_KERNEL), *s; + + if (copy == NULL) + return -ENOMEM; + + s = strstrip(copy); + + while (1) { + char *sep = strchr(s, ','); + + if (sep != NULL) + *sep = '\0'; + + if (strcasecmp(s, "barrier") == 0) + mask |= BLK_TC_BARRIER; + else if (strcasecmp(s, "complete") == 0) + mask |= BLK_TC_COMPLETE; + else if (strcasecmp(s, "fs") == 0) + mask |= BLK_TC_FS; + else if (strcasecmp(s, "issue") == 0) + mask |= BLK_TC_ISSUE; + else if (strcasecmp(s, "pc") == 0) + mask |= BLK_TC_PC; + else if (strcasecmp(s, "queue") == 0) + mask |= BLK_TC_QUEUE; + else if (strcasecmp(s, "read") == 0) + mask |= BLK_TC_READ; + else if (strcasecmp(s, "requeue") == 0) + mask |= BLK_TC_REQUEUE; + else if (strcasecmp(s, "sync") == 0) + mask |= BLK_TC_SYNC; + else if (strcasecmp(s, "write") == 0) + mask |= BLK_TC_WRITE; + + if (sep == NULL) + break; + + s = sep + 1; + } + kfree(copy); + + return mask; +} + +static ssize_t sysfs_blk_trace_attr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + struct request_queue *q; + struct block_device *bdev; + ssize_t ret = -ENXIO; + + lock_kernel(); + bdev = bdget(part_devt(p)); + if (bdev == NULL) + goto out_unlock_kernel; + + q = bdev_get_queue(bdev); + if (q == NULL) + goto out_bdput; + mutex_lock(&bdev->bd_mutex); + if (q->blk_trace == NULL) + ret = sprintf(buf, "disabled\n"); + else if (attr == &dev_attr_act_mask) + ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); + else if (attr == &dev_attr_pid) + ret = sprintf(buf, "%u\n", q->blk_trace->pid); + else if (attr == &dev_attr_start_lba) + ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); + else if (attr == &dev_attr_end_lba) + ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + mutex_unlock(&bdev->bd_mutex); +out_bdput: + bdput(bdev); +out_unlock_kernel: + unlock_kernel(); + return ret; +} + +static ssize_t sysfs_blk_trace_attr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct block_device *bdev; + struct request_queue *q; + struct hd_struct *p; + u64 value; + ssize_t ret = -ENXIO; + + if (count == 0) + goto out; + + if (attr == &dev_attr_act_mask) { + if (sscanf(buf, "%llx", &value) != 1) { + /* Assume it is a list of trace category names */ + value = blk_str2act_mask(buf); + if (value < 0) + goto out; + } + } else if (sscanf(buf, "%llu", &value) != 1) + goto out; + + lock_kernel(); + p = dev_to_part(dev); + bdev = bdget(part_devt(p)); + if (bdev == NULL) + goto out_unlock_kernel; + + q = bdev_get_queue(bdev); + if (q == NULL) + goto out_bdput; + + mutex_lock(&bdev->bd_mutex); + ret = 0; + if (q->blk_trace == NULL) + ret = blk_trace_setup_queue(q, bdev->bd_dev); + + if (ret == 0) { + if (attr == &dev_attr_act_mask) + q->blk_trace->act_mask = value; + else if (attr == &dev_attr_pid) + q->blk_trace->pid = value; + else if (attr == &dev_attr_start_lba) + q->blk_trace->start_lba = value; + else if (attr == &dev_attr_end_lba) + q->blk_trace->end_lba = value; + ret = count; + } + mutex_unlock(&bdev->bd_mutex); +out_bdput: + bdput(bdev); +out_unlock_kernel: + unlock_kernel(); +out: + return ret; +} -- cgit v1.2.3-70-g09d2 From 5fd73f862468280d4cbb5ba4321502f911f9f89a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 21:42:04 -0500 Subject: tracing: remove extra latency_trace method from trace structure Impact: clean up The trace and latency_trace function pointers are identical for every tracer but the function tracer. The differences in the function tracer are trivial (latency output puts paranthesis around parent). This patch removes the latency_trace pointer and all prints will now just use the trace output function pointer. Signed-off-by: Steven Rostedt --- kernel/trace/blktrace.c | 1 - kernel/trace/trace.c | 2 +- kernel/trace/trace_branch.c | 1 - kernel/trace/trace_output.c | 32 -------------------------------- kernel/trace/trace_output.h | 1 - 5 files changed, 1 insertion(+), 36 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e82cb9e930c..e39679a72a3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1231,7 +1231,6 @@ static struct tracer blk_tracer __read_mostly = { static struct trace_event trace_blk_event = { .type = TRACE_BLK, .trace = blk_trace_event_print, - .latency_trace = blk_trace_event_print, .binary = blk_trace_event_print_binary, }; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 55fcbb56795..21b89ecb848 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1485,7 +1485,7 @@ static enum print_line_t print_lat_fmt(struct trace_iterator *iter) } if (event) - return event->latency_trace(iter, sym_flags); + return event->trace(iter, sym_flags); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) goto partial; diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index c2e68d440c4..aaa0755268b 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -159,7 +159,6 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, static struct trace_event trace_branch_event = { .type = TRACE_BRANCH, .trace = trace_branch_print, - .latency_trace = trace_branch_print, }; static struct tracer branch_trace __read_mostly = diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 9fc815031b0..306fef84c50 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -437,8 +437,6 @@ int register_ftrace_event(struct trace_event *event) if (event->trace == NULL) event->trace = trace_nop_print; - if (event->latency_trace == NULL) - event->latency_trace = trace_nop_print; if (event->raw == NULL) event->raw = trace_nop_print; if (event->hex == NULL) @@ -480,29 +478,6 @@ enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) } /* TRACE_FN */ -static enum print_line_t trace_fn_latency(struct trace_iterator *iter, - int flags) -{ - struct ftrace_entry *field; - struct trace_seq *s = &iter->seq; - - trace_assign_type(field, iter->ent); - - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; - if (!trace_seq_puts(s, " (")) - goto partial; - if (!seq_print_ip_sym(s, field->parent_ip, flags)) - goto partial; - if (!trace_seq_puts(s, ")\n")) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; -} - static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) { struct ftrace_entry *field; @@ -573,7 +548,6 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) static struct trace_event trace_fn_event = { .type = TRACE_FN, .trace = trace_fn_trace, - .latency_trace = trace_fn_latency, .raw = trace_fn_raw, .hex = trace_fn_hex, .binary = trace_fn_bin, @@ -705,7 +679,6 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, static struct trace_event trace_ctx_event = { .type = TRACE_CTX, .trace = trace_ctx_print, - .latency_trace = trace_ctx_print, .raw = trace_ctx_raw, .hex = trace_ctx_hex, .binary = trace_ctxwake_bin, @@ -714,7 +687,6 @@ static struct trace_event trace_ctx_event = { static struct trace_event trace_wake_event = { .type = TRACE_WAKE, .trace = trace_wake_print, - .latency_trace = trace_wake_print, .raw = trace_wake_raw, .hex = trace_wake_hex, .binary = trace_ctxwake_bin, @@ -770,7 +742,6 @@ static enum print_line_t trace_special_bin(struct trace_iterator *iter, static struct trace_event trace_special_event = { .type = TRACE_SPECIAL, .trace = trace_special_print, - .latency_trace = trace_special_print, .raw = trace_special_print, .hex = trace_special_hex, .binary = trace_special_bin, @@ -808,7 +779,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, static struct trace_event trace_stack_event = { .type = TRACE_STACK, .trace = trace_stack_print, - .latency_trace = trace_stack_print, .raw = trace_special_print, .hex = trace_special_hex, .binary = trace_special_bin, @@ -838,7 +808,6 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, static struct trace_event trace_user_stack_event = { .type = TRACE_USER_STACK, .trace = trace_user_stack_print, - .latency_trace = trace_user_stack_print, .raw = trace_special_print, .hex = trace_special_hex, .binary = trace_special_bin, @@ -883,7 +852,6 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) static struct trace_event trace_print_event = { .type = TRACE_PRINT, .trace = trace_print_print, - .latency_trace = trace_print_print, .raw = trace_print_raw, }; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 551a25a7221..8a34d688ed6 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -10,7 +10,6 @@ struct trace_event { struct hlist_node node; int type; trace_print_func trace; - trace_print_func latency_trace; trace_print_func raw; trace_print_func hex; trace_print_func binary; -- cgit v1.2.3-70-g09d2 From ef18012b248b47ec9a12c3a83ca5e99782d39c5d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 14:10:56 -0400 Subject: tracing: remove funky whitespace in the trace code Impact: clean up There existed a lot of 's in the tracing code. This patch removes them. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 16 +++--- kernel/trace/blktrace.c | 10 ++-- kernel/trace/trace.c | 2 +- kernel/trace/trace_branch.c | 2 +- kernel/trace/trace_events_stage_3.h | 98 ++++++++++++++++++------------------ kernel/trace/trace_export.c | 2 +- kernel/trace/trace_functions_graph.c | 6 +-- kernel/trace/trace_output.c | 14 +++--- kernel/trace/trace_workqueue.c | 6 +-- 9 files changed, 78 insertions(+), 78 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 119ece224c2..d35a7ee7611 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -178,8 +178,8 @@ static inline void tracepoint_synchronize_unregister(void) * * prototype, declare it via TP_PROTO(): * * * - * TP_PROTO(struct rq *rq, struct task_struct *prev, - * struct task_struct *next), + * TP_PROTO(struct rq *rq, struct task_struct *prev, + * struct task_struct *next), * * * * * Define the call signature of the 'function'. @@ -187,7 +187,7 @@ static inline void tracepoint_synchronize_unregister(void) * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.) * * * - * TP_ARGS(rq, prev, next), + * TP_ARGS(rq, prev, next), * * * * * Fast binary tracing: define the trace record via @@ -229,13 +229,13 @@ static inline void tracepoint_synchronize_unregister(void) * * happens, on an active tracepoint. * * * - * TP_fast_assign( - * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - * __entry->prev_pid = prev->pid; - * __entry->prev_prio = prev->prio; + * TP_fast_assign( + * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + * __entry->prev_pid = prev->pid; + * __entry->prev_prio = prev->prio; * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); * __entry->next_pid = next->pid; - * __entry->next_prio = next->prio; + * __entry->next_prio = next->prio; * ) * * * diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e39679a72a3..bec69d3678c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -33,7 +33,7 @@ static struct trace_array *blk_tr; static int __read_mostly blk_tracer_enabled; /* Select an alternative, minimalistic output than the original one */ -#define TRACE_BLK_OPT_CLASSIC 0x1 +#define TRACE_BLK_OPT_CLASSIC 0x1 static struct tracer_opt blk_tracer_opts[] = { /* Default disable the minimalistic output */ @@ -564,7 +564,7 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop); /** * blk_trace_ioctl: - handle the ioctls associated with tracing * @bdev: the block device - * @cmd: the ioctl cmd + * @cmd: the ioctl cmd * @arg: the argument data, if any * **/ @@ -1128,9 +1128,9 @@ static void blk_tracer_reset(struct trace_array *tr) static struct { const char *act[2]; - int (*print)(struct trace_seq *s, const struct trace_entry *ent); + int (*print)(struct trace_seq *s, const struct trace_entry *ent); } what2act[] __read_mostly = { - [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, + [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, @@ -1229,7 +1229,7 @@ static struct tracer blk_tracer __read_mostly = { }; static struct trace_event trace_blk_event = { - .type = TRACE_BLK, + .type = TRACE_BLK, .trace = blk_trace_event_print, .binary = blk_trace_event_print_binary, }; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cc94f864248..8c6a902db40 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -799,7 +799,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->tgid = (tsk) ? tsk->tgid : 0; + entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index aaa0755268b..ad8c22efff4 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -157,7 +157,7 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, static struct trace_event trace_branch_event = { - .type = TRACE_BRANCH, + .type = TRACE_BRANCH, .trace = trace_branch_print, }; diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 6ee1de59f19..ae2e323df0c 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -5,23 +5,23 @@ * * static void ftrace_event_(proto) * { - * event_trace_printk(_RET_IP_, ": " ); + * event_trace_printk(_RET_IP_, ": " ); * } * * static int ftrace_reg_event_(void) * { - * int ret; + * int ret; * - * ret = register_trace_(ftrace_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; + * ret = register_trace_(ftrace_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; * } * * static void ftrace_unreg_event_(void) * { - * unregister_trace_(ftrace_event_); + * unregister_trace_(ftrace_event_); * } * * For those macros defined with TRACE_FORMAT: @@ -29,9 +29,9 @@ * static struct ftrace_event_call __used * __attribute__((__aligned__(4))) * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, * } * * @@ -41,66 +41,66 @@ * * static void ftrace_raw_event_(proto) * { - * struct ring_buffer_event *event; - * struct ftrace_raw_ *entry; <-- defined in stage 1 - * unsigned long irq_flags; - * int pc; - * - * local_save_flags(irq_flags); - * pc = preempt_count(); - * - * event = trace_current_buffer_lock_reserve(event_.id, - * sizeof(struct ftrace_raw_), - * irq_flags, pc); - * if (!event) - * return; - * entry = ring_buffer_event_data(event); - * - * ; <-- Here we assign the entries by the __field and + * struct ring_buffer_event *event; + * struct ftrace_raw_ *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_.id, + * sizeof(struct ftrace_raw_), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * ; <-- Here we assign the entries by the __field and * __array macros. * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * trace_current_buffer_unlock_commit(event, irq_flags, pc); * } * * static int ftrace_raw_reg_event_(void) * { - * int ret; + * int ret; * - * ret = register_trace_(ftrace_raw_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; + * ret = register_trace_(ftrace_raw_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; * } * * static void ftrace_unreg_event_(void) * { - * unregister_trace_(ftrace_raw_event_); + * unregister_trace_(ftrace_raw_event_); * } * * static struct trace_event ftrace_event_type_ = { - * .trace = ftrace_raw_output_, <-- stage 2 + * .trace = ftrace_raw_output_, <-- stage 2 * }; * * static int ftrace_raw_init_event_(void) * { - * int id; + * int id; * - * id = register_ftrace_event(&ftrace_event_type_); - * if (!id) - * return -ENODEV; - * event_.id = id; - * return 0; + * id = register_ftrace_event(&ftrace_event_type_); + * if (!id) + * return -ENODEV; + * event_.id = id; + * return 0; * } * * static struct ftrace_event_call __used * __attribute__((__aligned__(4))) * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", + * .name = "", * .system = "", - * .raw_init = ftrace_raw_init_event_, - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, + * .raw_init = ftrace_raw_init_event_, + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, * .show_format = ftrace_format_, * } * @@ -138,7 +138,7 @@ _TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ + .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .regfunc = ftrace_reg_event_##call, \ .unregfunc = ftrace_unreg_event_##call, \ @@ -163,7 +163,7 @@ static void ftrace_raw_event_##call(proto) \ pc = preempt_count(); \ \ event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ + sizeof(struct ftrace_raw_##call), \ irq_flags, pc); \ if (!event) \ return; \ @@ -208,7 +208,7 @@ static int ftrace_raw_init_event_##call(void) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ + .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 23ae78430d5..4d9952d3df5 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -94,7 +94,7 @@ ftrace_format_##call(struct trace_seq *s) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ + .name = #call, \ .id = proto, \ .system = __stringify(TRACE_SYSTEM), \ .show_format = ftrace_format_##call, \ diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 453ebd3b636..d1493b853e4 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -841,12 +841,12 @@ static void graph_trace_close(struct trace_iterator *iter) } static struct tracer graph_trace __read_mostly = { - .name = "function_graph", + .name = "function_graph", .open = graph_trace_open, .close = graph_trace_close, .wait_pipe = poll_wait_pipe, - .init = graph_trace_init, - .reset = graph_trace_reset, + .init = graph_trace_init, + .reset = graph_trace_reset, .print_line = print_graph_function, .print_header = print_graph_headers, .flags = &tracer_flags, diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ef8fd661b21..491832af9ba 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -565,7 +565,7 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) } static struct trace_event trace_fn_event = { - .type = TRACE_FN, + .type = TRACE_FN, .trace = trace_fn_trace, .raw = trace_fn_raw, .hex = trace_fn_hex, @@ -696,7 +696,7 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, } static struct trace_event trace_ctx_event = { - .type = TRACE_CTX, + .type = TRACE_CTX, .trace = trace_ctx_print, .raw = trace_ctx_raw, .hex = trace_ctx_hex, @@ -704,7 +704,7 @@ static struct trace_event trace_ctx_event = { }; static struct trace_event trace_wake_event = { - .type = TRACE_WAKE, + .type = TRACE_WAKE, .trace = trace_wake_print, .raw = trace_wake_raw, .hex = trace_wake_hex, @@ -759,7 +759,7 @@ static enum print_line_t trace_special_bin(struct trace_iterator *iter, } static struct trace_event trace_special_event = { - .type = TRACE_SPECIAL, + .type = TRACE_SPECIAL, .trace = trace_special_print, .raw = trace_special_print, .hex = trace_special_hex, @@ -796,7 +796,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, } static struct trace_event trace_stack_event = { - .type = TRACE_STACK, + .type = TRACE_STACK, .trace = trace_stack_print, .raw = trace_special_print, .hex = trace_special_hex, @@ -825,7 +825,7 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, } static struct trace_event trace_user_stack_event = { - .type = TRACE_USER_STACK, + .type = TRACE_USER_STACK, .trace = trace_user_stack_print, .raw = trace_special_print, .hex = trace_special_hex, @@ -879,7 +879,7 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) static struct trace_event trace_print_event = { - .type = TRACE_PRINT, + .type = TRACE_PRINT, .trace = trace_print_print, .raw = trace_print_raw, }; diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 4664990fe9c..e542483df62 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -19,14 +19,14 @@ struct cpu_workqueue_stats { /* Useful to know if we print the cpu headers */ bool first_entry; int cpu; - pid_t pid; + pid_t pid; /* Can be inserted from interrupt or user context, need to be atomic */ - atomic_t inserted; + atomic_t inserted; /* * Don't need to be atomic, works are serialized in a single workqueue thread * on a single CPU. */ - unsigned int executed; + unsigned int executed; }; /* List of workqueue threads on one cpu */ -- cgit v1.2.3-70-g09d2 From 4ca530852346be239b7c19e7bec5d2b78855bebe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Mar 2009 19:20:15 -0400 Subject: tracing: protect reader of cmdline output Impact: fix to one cause of incorrect comm outputs in trace The spinlock only protected the creation of a comm <=> pid pair. But it was possible that a reader could look up a pid, and get the wrong comm because it had no locking. This also required changing trace_find_cmdline to copy the comm cache and not just send back a pointer to it. Signed-off-by: Steven Rostedt --- kernel/trace/blktrace.c | 23 ++++++++++++++++++----- kernel/trace/trace.c | 20 ++++++++++++-------- kernel/trace/trace.h | 2 +- kernel/trace/trace_functions_graph.c | 12 ++++++------ kernel/trace/trace_output.c | 18 ++++++++++++------ 5 files changed, 49 insertions(+), 26 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 1f32e4edf49..b171778e386 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1027,7 +1027,9 @@ static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { - const char *cmd = trace_find_cmdline(ent->pid); + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); if (t_sec(ent)) return trace_seq_printf(s, "%llu + %u [%s]\n", @@ -1057,19 +1059,30 @@ static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) { - return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid)); + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); + + return trace_seq_printf(s, "[%s]\n", cmd); } static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) { - return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid), - get_pdu_int(ent)); + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); + + return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); } static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) { + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); + return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), - get_pdu_int(ent), trace_find_cmdline(ent->pid)); + get_pdu_int(ent), cmd); } /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index efe3202c020..2796bd2b17e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -770,25 +770,29 @@ static void trace_save_cmdline(struct task_struct *tsk) __raw_spin_unlock(&trace_cmdline_lock); } -char *trace_find_cmdline(int pid) +void trace_find_cmdline(int pid, char comm[]) { - char *cmdline = "<...>"; unsigned map; - if (!pid) - return ""; + if (!pid) { + strcpy(comm, ""); + return; + } - if (pid > PID_MAX_DEFAULT) - goto out; + if (pid > PID_MAX_DEFAULT) { + strcpy(comm, "<...>"); + return; + } + __raw_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; if (map >= SAVED_CMDLINES) goto out; - cmdline = saved_cmdlines[map]; + strcpy(comm, saved_cmdlines[map]); out: - return cmdline; + __raw_spin_unlock(&trace_cmdline_lock); } void tracing_record_cmdline(struct task_struct *tsk) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 56ce34d90b0..b0ecad8ecc3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -547,7 +547,7 @@ struct tracer_switch_ops { }; #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ -extern char *trace_find_cmdline(int pid); +extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4c388607ed6..6004ccac2dd 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -190,15 +190,15 @@ print_graph_cpu(struct trace_seq *s, int cpu) static enum print_line_t print_graph_proc(struct trace_seq *s, pid_t pid) { - int i; - int ret; - int len; - char comm[8]; - int spaces = 0; + char comm[TASK_COMM_LEN]; /* sign + log10(MAX_INT) + '\0' */ char pid_str[11]; + int spaces = 0; + int ret; + int len; + int i; - strncpy(comm, trace_find_cmdline(pid), 7); + trace_find_cmdline(pid, comm); comm[7] = '\0'; sprintf(pid_str, "%d", pid); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ea9d3b410c7..6a4c9dea191 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -309,9 +309,9 @@ static int lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { int hardirq, softirq; - char *comm; + char comm[TASK_COMM_LEN]; - comm = trace_find_cmdline(entry->pid); + trace_find_cmdline(entry->pid, comm); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; @@ -346,10 +346,12 @@ int trace_print_context(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; - char *comm = trace_find_cmdline(entry->pid); unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned long secs = (unsigned long)t; + char comm[TASK_COMM_LEN]; + + trace_find_cmdline(entry->pid, comm); return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ", comm, entry->pid, iter->cpu, secs, usec_rem); @@ -372,7 +374,10 @@ int trace_print_lat_context(struct trace_iterator *iter) rel_usecs = ns2usecs(next_ts - iter->ts); if (verbose) { - char *comm = trace_find_cmdline(entry->pid); + char comm[TASK_COMM_LEN]; + + trace_find_cmdline(entry->pid, comm); + ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, entry->pid, iter->cpu, entry->flags, @@ -577,14 +582,15 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, char *delim) { struct ctx_switch_entry *field; - char *comm; + char comm[TASK_COMM_LEN]; int S, T; + trace_assign_type(field, iter->ent); T = task_state_char(field->next_state); S = task_state_char(field->prev_state); - comm = trace_find_cmdline(field->next_pid); + trace_find_cmdline(field->next_pid, comm); if (!trace_seq_printf(&iter->seq, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", field->prev_pid, -- cgit v1.2.3-70-g09d2 From 1a17662ea033674a58bad3603531b0b5d42572f6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Mar 2009 09:47:30 +0800 Subject: blktrace: fix possible memory leak When we failed to create "block" debugfs dir, we should do some cleanups. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt LKML-Reference: <49C2F5B2.8000800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b171778e386..fb3bc53835d 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -432,7 +432,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!blk_tree_root) { blk_tree_root = debugfs_create_dir("block", NULL); if (!blk_tree_root) - return -ENOMEM; + goto err; } dir = debugfs_create_dir(buts->name, blk_tree_root); -- cgit v1.2.3-70-g09d2 From 5006ea73f38caef6065d1136808413813271633f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Mar 2009 09:48:03 +0800 Subject: blktrace: make blk_tracer_enabled a bool flag It doesn't have to be a counter, and it can be a bool flag instead. Signed-off-by: Li Zefan Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt Acked-by: Frederic Weisbecker LKML-Reference: <49C2F5D3.8090104@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index fb3bc53835d..73845b7968b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -30,7 +30,7 @@ static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; -static int __read_mostly blk_tracer_enabled; +static bool blk_tracer_enabled __read_mostly; /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 @@ -1111,9 +1111,7 @@ static int blk_tracer_init(struct trace_array *tr) { blk_tr = tr; blk_tracer_start(tr); - mutex_lock(&blk_probe_mutex); - blk_tracer_enabled++; - mutex_unlock(&blk_probe_mutex); + blk_tracer_enabled = true; return 0; } @@ -1131,11 +1129,7 @@ static void blk_tracer_reset(struct trace_array *tr) if (!atomic_read(&blk_probes_ref)) return; - mutex_lock(&blk_probe_mutex); - blk_tracer_enabled--; - WARN_ON(blk_tracer_enabled < 0); - mutex_unlock(&blk_probe_mutex); - + blk_tracer_enabled = false; blk_tracer_stop(tr); } -- cgit v1.2.3-70-g09d2 From 3c289ba7c320560ee74979a8895141c829046a2d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Mar 2009 09:48:26 +0800 Subject: blktrace: remove blk_probe_mutex blk_register_tracepoints() always returns 0, so make it return void, thus we don't need to use blk_probe_mutex to protect blk_probes_ref. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt LKML-Reference: <49C2F5EA.8060606@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 73845b7968b..223b92e77b3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = { }; /* Global reference count of probes */ -static DEFINE_MUTEX(blk_probe_mutex); static atomic_t blk_probes_ref = ATOMIC_INIT(0); -static int blk_register_tracepoints(void); +static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); /* @@ -256,10 +255,8 @@ static void blk_trace_cleanup(struct blk_trace *bt) free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); - mutex_lock(&blk_probe_mutex); if (atomic_dec_and_test(&blk_probes_ref)) blk_unregister_tracepoints(); - mutex_unlock(&blk_probe_mutex); } int blk_trace_remove(struct request_queue *q) @@ -471,13 +468,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; - mutex_lock(&blk_probe_mutex); - if (atomic_add_return(1, &blk_probes_ref) == 1) { - ret = blk_register_tracepoints(); - if (ret) - goto probe_err; - } - mutex_unlock(&blk_probe_mutex); + if (atomic_add_return(1, &blk_probes_ref) == 1) + blk_register_tracepoints(); ret = -EBUSY; old_bt = xchg(&q->blk_trace, bt); @@ -487,9 +479,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } return 0; -probe_err: - atomic_dec(&blk_probes_ref); - mutex_unlock(&blk_probe_mutex); err: if (bt) { if (bt->msg_file) @@ -863,7 +852,7 @@ void blk_add_driver_data(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_add_driver_data); -static int blk_register_tracepoints(void) +static void blk_register_tracepoints(void) { int ret; @@ -901,7 +890,6 @@ static int blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_remap(blk_add_trace_remap); WARN_ON(ret); - return 0; } static void blk_unregister_tracepoints(void) @@ -1099,11 +1087,8 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { - mutex_lock(&blk_probe_mutex); if (atomic_add_return(1, &blk_probes_ref) == 1) - if (blk_register_tracepoints()) - atomic_dec(&blk_probes_ref); - mutex_unlock(&blk_probe_mutex); + blk_register_tracepoints(); trace_flags &= ~TRACE_ITER_CONTEXT_INFO; } @@ -1118,10 +1103,8 @@ static int blk_tracer_init(struct trace_array *tr) static void blk_tracer_stop(struct trace_array *tr) { trace_flags |= TRACE_ITER_CONTEXT_INFO; - mutex_lock(&blk_probe_mutex); if (atomic_dec_and_test(&blk_probes_ref)) blk_unregister_tracepoints(); - mutex_unlock(&blk_probe_mutex); } static void blk_tracer_reset(struct trace_array *tr) -- cgit v1.2.3-70-g09d2 From cbe28296eb1ac441b35cf45804d0ae808add7dd1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Mar 2009 09:48:47 +0800 Subject: blktrace: don't increase blk_probes_ref if failed to setup blk trace do_blk_trace_setup() may return EBUSY, but the current code doesn't decrease blk_probes_ref in this case. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt LKML-Reference: <49C2F5FF.80002@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 223b92e77b3..11e7c8d9d22 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -468,9 +468,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; - if (atomic_add_return(1, &blk_probes_ref) == 1) - blk_register_tracepoints(); - ret = -EBUSY; old_bt = xchg(&q->blk_trace, bt); if (old_bt) { @@ -478,6 +475,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, goto err; } + if (atomic_add_return(1, &blk_probes_ref) == 1) + blk_register_tracepoints(); + return 0; err: if (bt) { -- cgit v1.2.3-70-g09d2 From 15152e448b693fa41de40f1e40ffbe717a3aab88 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Mar 2009 09:49:08 +0800 Subject: blktrace: report EBUSY correctly blk_trace_remove_queue() returns EINVAL if q->blk_trace == NULL, but blk_trace_setup_queue() doesn't return EBUSY if q->blk_trace != NULL. # echo 0 > sdaX/trace/enable # echo 0 > sdaX/trace/enable bash: echo: write error: Invalid argument # echo 1 > sdaX/trace/enable # echo 1 > sdaX/trace/enable (should return EBUSY) Signed-off-by: Li Zefan Acked-by: Arnaldo Carvalho de Melo Acked-by: Frederic Weisbecker Cc: Jens Axboe Cc: Steven Rostedt LKML-Reference: <49C2F614.2010101@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 11e7c8d9d22..14986afdbc1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1260,12 +1260,10 @@ static int blk_trace_remove_queue(struct request_queue *q) static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) { struct blk_trace *old_bt, *bt = NULL; - int ret; - ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) - goto err; + return -ENOMEM; bt->dev = dev; bt->act_mask = (u16)-1; @@ -1276,11 +1274,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) if (old_bt != NULL) { (void)xchg(&q->blk_trace, old_bt); kfree(bt); - ret = -EBUSY; + return -EBUSY; } + return 0; -err: - return ret; } /* -- cgit v1.2.3-70-g09d2 From cd649b8bb830d65c57c3c8b98d57b5402256d8bd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Mar 2009 11:33:55 +0800 Subject: blktrace: remove sysfs_blk_trace_enable_show/store() sysfs_blk_trace_enable_show()/store() share most of code with sysfs_blk_trace_attr_show()/store(). Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt LKML-Reference: <49C30EA3.1060004@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 92 ++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 70 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 14986afdbc1..dfee6f91517 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1284,72 +1284,6 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) * sysfs interface to enable and configure tracing */ -static ssize_t sysfs_blk_trace_enable_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - struct block_device *bdev; - ssize_t ret = -ENXIO; - - lock_kernel(); - bdev = bdget(part_devt(p)); - if (bdev != NULL) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q != NULL) { - mutex_lock(&bdev->bd_mutex); - ret = sprintf(buf, "%u\n", !!q->blk_trace); - mutex_unlock(&bdev->bd_mutex); - } - - bdput(bdev); - } - - unlock_kernel(); - return ret; -} - -static ssize_t sysfs_blk_trace_enable_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct block_device *bdev; - struct request_queue *q; - struct hd_struct *p; - int value; - ssize_t ret = -ENXIO; - - if (count == 0 || sscanf(buf, "%d", &value) != 1) - goto out; - - lock_kernel(); - p = dev_to_part(dev); - bdev = bdget(part_devt(p)); - if (bdev == NULL) - goto out_unlock_kernel; - - q = bdev_get_queue(bdev); - if (q == NULL) - goto out_bdput; - - mutex_lock(&bdev->bd_mutex); - if (value) - ret = blk_trace_setup_queue(q, bdev->bd_dev); - else - ret = blk_trace_remove_queue(q); - mutex_unlock(&bdev->bd_mutex); - - if (ret == 0) - ret = count; -out_bdput: - bdput(bdev); -out_unlock_kernel: - unlock_kernel(); -out: - return ret; -} - static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -1361,8 +1295,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, sysfs_blk_trace_attr_show, \ sysfs_blk_trace_attr_store) -static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, - sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store); +static BLK_TRACE_DEVICE_ATTR(enable); static BLK_TRACE_DEVICE_ATTR(act_mask); static BLK_TRACE_DEVICE_ATTR(pid); static BLK_TRACE_DEVICE_ATTR(start_lba); @@ -1447,6 +1380,12 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (q == NULL) goto out_bdput; mutex_lock(&bdev->bd_mutex); + + if (attr == &dev_attr_enable) { + ret = sprintf(buf, "%u\n", !!q->blk_trace); + goto out_unlock_bdev; + } + if (q->blk_trace == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) @@ -1457,6 +1396,8 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); else if (attr == &dev_attr_end_lba) ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + +out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); out_bdput: bdput(bdev); @@ -1499,6 +1440,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out_bdput; mutex_lock(&bdev->bd_mutex); + + if (attr == &dev_attr_enable) { + if (value) + ret = blk_trace_setup_queue(q, bdev->bd_dev); + else + ret = blk_trace_remove_queue(q); + goto out_unlock_bdev; + } + ret = 0; if (q->blk_trace == NULL) ret = blk_trace_setup_queue(q, bdev->bd_dev); @@ -1512,13 +1462,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, q->blk_trace->start_lba = value; else if (attr == &dev_attr_end_lba) q->blk_trace->end_lba = value; - ret = count; } + +out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); out_bdput: bdput(bdev); out_unlock_kernel: unlock_kernel(); out: - return ret; + return ret ? ret : count; } + -- cgit v1.2.3-70-g09d2 From b125130b22d67f249beba10b71a254558b5279d0 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Mar 2009 10:34:00 +0800 Subject: blktrace: avoid accessing NULL bdev->bd_disk bdev->bd_disk can be NULL, if the block device is not opened. Try this against an unmounted partition, and you'll see NULL dereference: # echo 1 > /sys/block/sda/sda5/enable Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt LKML-Reference: <49C30098.6080107@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index dfee6f91517..108f4f7715a 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1362,6 +1362,14 @@ static int blk_str2act_mask(const char *str) return mask; } +static struct request_queue *blk_trace_get_queue(struct block_device *bdev) +{ + if (bdev->bd_disk == NULL) + return NULL; + + return bdev_get_queue(bdev); +} + static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1376,9 +1384,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (bdev == NULL) goto out_unlock_kernel; - q = bdev_get_queue(bdev); + q = blk_trace_get_queue(bdev); if (q == NULL) goto out_bdput; + mutex_lock(&bdev->bd_mutex); if (attr == &dev_attr_enable) { @@ -1435,7 +1444,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (bdev == NULL) goto out_unlock_kernel; - q = bdev_get_queue(bdev); + q = blk_trace_get_queue(bdev); if (q == NULL) goto out_bdput; -- cgit v1.2.3-70-g09d2 From e4955c9986a27bb47ddeb6cd55803053f547e2e9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Mar 2009 16:04:37 +0800 Subject: blktrace: mark ddir_act[] const Impact: cleanup ddir_act and what2act always stay immutable. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <49C89415.5080503@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 108f4f7715a..1ffcbd4ac45 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -147,8 +147,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, /* * Data direction bit lookup */ -static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), - BLK_TC_ACT(BLK_TC_WRITE) }; +static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), + BLK_TC_ACT(BLK_TC_WRITE) }; /* The ilog2() calls fall out because they're constant */ #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ @@ -1116,10 +1116,10 @@ static void blk_tracer_reset(struct trace_array *tr) blk_tracer_stop(tr); } -static struct { +static const struct { const char *act[2]; int (*print)(struct trace_seq *s, const struct trace_entry *ent); -} what2act[] __read_mostly = { +} what2act[] = { [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, -- cgit v1.2.3-70-g09d2 From 65796348e09880e12b97267d39b8857c758440a6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Mar 2009 16:05:06 +0800 Subject: blktrace: fix wrong calculation of RWBS Impact: fix the output of IO type category characters Trace categories are the upper 16 bits, not the lower 16 bits. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <49C89432.8010805@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 1ffcbd4ac45..9af41430ee5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -922,23 +922,24 @@ static void blk_unregister_tracepoints(void) static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) { int i = 0; + int tc = t->action >> BLK_TC_SHIFT; - if (t->action & BLK_TC_DISCARD) + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; - else if (t->action & BLK_TC_WRITE) + else if (tc & BLK_TC_WRITE) rwbs[i++] = 'W'; else if (t->bytes) rwbs[i++] = 'R'; else rwbs[i++] = 'N'; - if (t->action & BLK_TC_AHEAD) + if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (t->action & BLK_TC_BARRIER) + if (tc & BLK_TC_BARRIER) rwbs[i++] = 'B'; - if (t->action & BLK_TC_SYNC) + if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; - if (t->action & BLK_TC_META) + if (tc & BLK_TC_META) rwbs[i++] = 'M'; rwbs[i] = '\0'; -- cgit v1.2.3-70-g09d2 From e0dc81bec0927fa0c8aabc521793161909eef7a5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Mar 2009 16:05:51 +0800 Subject: blktrace: fix t_error() Impact: fix error flag output t_error() should return t->error but not t->sector. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <49C8945F.5020802@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9af41430ee5..f69f8bd8bef 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -968,7 +968,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent) static inline __u16 t_error(const struct trace_entry *ent) { - return te_blk_io_trace(ent)->sector; + return te_blk_io_trace(ent)->error; } static __u64 get_pdu_int(const struct trace_entry *ent) -- cgit v1.2.3-70-g09d2 From 093419971e03362a00f499960557c119982ea09f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Mar 2009 17:43:30 +0800 Subject: blktrace: print human-readable act_mask Impact: new feature, allow symbolic values in /debug/tracing/act_mask Print stringified act_mask instead of hex value: # cat act_mask read,write,barrier,sync,queue,requeue,issue,complete,fs,pc,ahead,meta, discard,drv_data # echo "meta,write" > act_mask # cat act_mask write,meta Also: - make act_mask accept "ahead", "meta", "discard" and "drv_data" - use strsep() instead of strchr() to parse user input - return -EINVAL if a token is not found in the mask map - fix a bug that 'value' is unsigned, so it can < 0 - propagate error value of blk_trace_mask2str() to userspace, but not always return -ENXIO. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <49C8AB42.1000802@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 103 ++++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 38 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index f69f8bd8bef..6fb274f5f34 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1316,53 +1316,77 @@ struct attribute_group blk_trace_attr_group = { .attrs = blk_trace_attrs, }; -static int blk_str2act_mask(const char *str) +static const struct { + int mask; + const char *str; +} mask_maps[] = { + { BLK_TC_READ, "read" }, + { BLK_TC_WRITE, "write" }, + { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_SYNC, "sync" }, + { BLK_TC_QUEUE, "queue" }, + { BLK_TC_REQUEUE, "requeue" }, + { BLK_TC_ISSUE, "issue" }, + { BLK_TC_COMPLETE, "complete" }, + { BLK_TC_FS, "fs" }, + { BLK_TC_PC, "pc" }, + { BLK_TC_AHEAD, "ahead" }, + { BLK_TC_META, "meta" }, + { BLK_TC_DISCARD, "discard" }, + { BLK_TC_DRV_DATA, "drv_data" }, +}; + +static int blk_trace_str2mask(const char *str) { + int i; int mask = 0; - char *copy = kstrdup(str, GFP_KERNEL), *s; + char *s, *token; - if (copy == NULL) + s = kstrdup(str, GFP_KERNEL); + if (s == NULL) return -ENOMEM; - - s = strstrip(copy); + s = strstrip(s); while (1) { - char *sep = strchr(s, ','); - - if (sep != NULL) - *sep = '\0'; - - if (strcasecmp(s, "barrier") == 0) - mask |= BLK_TC_BARRIER; - else if (strcasecmp(s, "complete") == 0) - mask |= BLK_TC_COMPLETE; - else if (strcasecmp(s, "fs") == 0) - mask |= BLK_TC_FS; - else if (strcasecmp(s, "issue") == 0) - mask |= BLK_TC_ISSUE; - else if (strcasecmp(s, "pc") == 0) - mask |= BLK_TC_PC; - else if (strcasecmp(s, "queue") == 0) - mask |= BLK_TC_QUEUE; - else if (strcasecmp(s, "read") == 0) - mask |= BLK_TC_READ; - else if (strcasecmp(s, "requeue") == 0) - mask |= BLK_TC_REQUEUE; - else if (strcasecmp(s, "sync") == 0) - mask |= BLK_TC_SYNC; - else if (strcasecmp(s, "write") == 0) - mask |= BLK_TC_WRITE; - - if (sep == NULL) + token = strsep(&s, ","); + if (token == NULL) break; - s = sep + 1; + if (*token == '\0') + continue; + + for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { + if (strcasecmp(token, mask_maps[i].str) == 0) { + mask |= mask_maps[i].mask; + break; + } + } + if (i == ARRAY_SIZE(mask_maps)) { + mask = -EINVAL; + break; + } } - kfree(copy); + kfree(s); return mask; } +static ssize_t blk_trace_mask2str(char *buf, int mask) +{ + int i; + char *p = buf; + + for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { + if (mask & mask_maps[i].mask) { + p += sprintf(p, "%s%s", + (p == buf) ? "" : ",", mask_maps[i].str); + } + } + *p++ = '\n'; + + return p - buf; +} + static struct request_queue *blk_trace_get_queue(struct block_device *bdev) { if (bdev->bd_disk == NULL) @@ -1399,7 +1423,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (q->blk_trace == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) - ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); + ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); else if (attr == &dev_attr_pid) ret = sprintf(buf, "%u\n", q->blk_trace->pid); else if (attr == &dev_attr_start_lba) @@ -1424,7 +1448,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct request_queue *q; struct hd_struct *p; u64 value; - ssize_t ret = -ENXIO; + ssize_t ret = -EINVAL; if (count == 0) goto out; @@ -1432,13 +1456,16 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (attr == &dev_attr_act_mask) { if (sscanf(buf, "%llx", &value) != 1) { /* Assume it is a list of trace category names */ - value = blk_str2act_mask(buf); - if (value < 0) + ret = blk_trace_str2mask(buf); + if (ret < 0) goto out; + value = ret; } } else if (sscanf(buf, "%llu", &value) != 1) goto out; + ret = -ENXIO; + lock_kernel(); p = dev_to_part(dev); bdev = bdget(part_devt(p)); -- cgit v1.2.3-70-g09d2 From 6c051ce0307526adec32a847f0daa1af2124f0a9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 25 Mar 2009 17:18:56 +0800 Subject: blktrace: fix timestamp in binary output I found the timestamp is wrong: # echo bin > trace_option # echo blk > current_tracer # cat trace_pipe | blkparse -i - 8,0 0 0 0.000000000 504 A W ... ... 8,7 1 0 0.008534097 0 C R ... (should be 8.534097xxx) user-space blkparse expects the timestamp to be nanosecond. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6fb274f5f34..ee7a8bb8b1e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1168,7 +1168,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) const int offset = offsetof(struct blk_io_trace, sector); struct blk_io_trace old = { .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, - .time = ns2usecs(iter->ts), + .time = iter->ts, }; if (!trace_seq_putmem(s, &old, offset)) -- cgit v1.2.3-70-g09d2 From b5230b56ee6caeb27cedb7753c0c319646383bb4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 25 Mar 2009 17:19:33 +0800 Subject: blktrace: fix a race when creating blk_tree_root in debugfs t1 t2 ------ ------ do_blk_trace_setup() do_blk_trace_setup() if (!blk_tree_root) { if (!blk_tree_root) blk_tree_root = create_dir() blk_tree_root = create_dir(); (now blk_tree_root == NULL) ... dir = create_dir(name, blk_tree_root); Due to this race, t1 will create 'dir' in /debugfs but not /debugfs/block. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ee7a8bb8b1e..95f89faca73 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -426,11 +426,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = -ENOENT; + mutex_lock(&blk_tree_mutex); if (!blk_tree_root) { blk_tree_root = debugfs_create_dir("block", NULL); - if (!blk_tree_root) + if (!blk_tree_root) { + mutex_unlock(&blk_tree_mutex); goto err; + } } + mutex_unlock(&blk_tree_mutex); dir = debugfs_create_dir(buts->name, blk_tree_root); -- cgit v1.2.3-70-g09d2 From 5554720482a631702146a959db22fe417740e0a6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 25 Mar 2009 17:21:26 +0800 Subject: blktrace: fix the original blktrace Currently the original blktrace, which is using relay and is used via ioctl, is broken. You can use ftrace to see the output of blktrace, but user-space blktrace is unusable. It's broken by "blktrace: add ftrace plugin" (c71a896154119f4ca9e89d6078f5f63ad60ef199) - if (unlikely(bt->trace_state != Blktrace_running)) + if (unlikely(bt->trace_state != Blktrace_running || !blk_tracer_enabled)) return; With this patch, both ioctl and ftrace can be used, but of course you can't use both of them at the same time. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 95f89faca73..a7f7ff5db38 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -110,7 +110,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) unsigned long flags; char *buf; - if (blk_tr) { + if (blk_tracer_enabled) { va_start(args, fmt); ftrace_vprintk(fmt, args); va_end(args); @@ -169,7 +169,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, pid_t pid; int cpu, pc = 0; - if (unlikely(bt->trace_state != Blktrace_running || + if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer_enabled)) return; @@ -185,7 +185,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, return; cpu = raw_smp_processor_id(); - if (blk_tr) { + if (blk_tracer_enabled) { tracing_record_cmdline(current); pc = preempt_count(); @@ -235,7 +235,7 @@ record_it: if (pdu_len) memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); - if (blk_tr) { + if (blk_tracer_enabled) { trace_buffer_unlock_commit(blk_tr, event, 0, pc); return; } @@ -267,8 +267,7 @@ int blk_trace_remove(struct request_queue *q) if (!bt) return -EINVAL; - if (bt->trace_state == Blktrace_setup || - bt->trace_state == Blktrace_stopped) + if (bt->trace_state != Blktrace_running) blk_trace_cleanup(bt); return 0; @@ -1273,7 +1272,6 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) bt->dev = dev; bt->act_mask = (u16)-1; bt->end_lba = -1ULL; - bt->trace_state = Blktrace_running; old_bt = xchg(&q->blk_trace, bt); if (old_bt != NULL) { -- cgit v1.2.3-70-g09d2 From eb08f8eb0673d9c1e62b69ad1b41593e73c40467 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Mar 2009 16:05:27 +0800 Subject: blktrace: fix off-by-one bug 'what' is used as the index of array what2act, so it can't >= the array size. Signed-off-by: Li Zefan Acked-by: Jens Axboe Acked-by: Arnaldo Carvalho de Melo Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index a7f7ff5db38..d43cdac7c75 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1152,7 +1152,7 @@ static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, if (!trace_print_context(iter)) return TRACE_TYPE_PARTIAL_LINE; - if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) + if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) ret = trace_seq_printf(s, "Bad pc action %x\n", what); else { const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); @@ -1199,7 +1199,7 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) t = (const struct blk_io_trace *)iter->ent; what = t->action & ((1 << BLK_TC_SHIFT) - 1); - if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) + if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what); else { const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); -- cgit v1.2.3-70-g09d2 From 35ac51bfe4c293b67ce9f85082ba0b9bc6123c40 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 27 Mar 2009 10:19:46 +0800 Subject: blktrace: make classic output more classic Impact: fix ftrace plugin timestamp output In the classic user-space blktrace, the output timestamp is sec.nsec not sec.usec. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d43cdac7c75..5b28f0f119c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -994,8 +994,8 @@ static void get_pdu_remap(const struct trace_entry *ent, static int blk_log_action_iter(struct trace_iterator *iter, const char *act) { char rwbs[6]; - unsigned long long ts = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(ts, USEC_PER_SEC); + unsigned long long ts = iter->ts; + unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; const struct trace_entry *ent = iter->ent; const struct blk_io_trace *t = (const struct blk_io_trace *)ent; @@ -1003,9 +1003,9 @@ static int blk_log_action_iter(struct trace_iterator *iter, const char *act) fill_rwbs(rwbs, t); return trace_seq_printf(&iter->seq, - "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", + "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", MAJOR(t->device), MINOR(t->device), iter->cpu, - secs, usec_rem, ent->pid, act, rwbs); + secs, nsec_rem, ent->pid, act, rwbs); } static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, -- cgit v1.2.3-70-g09d2 From 17ba97e347bec9bbc47a0877c7a098708982129d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 27 Mar 2009 10:20:09 +0800 Subject: blktrace: fix blk_probes_ref chaos Impact: fix mixed ioctl and ftrace-plugin blktrace use refcount bugs ioctl-based blktrace allocates bt and registers tracepoints when ioctl(BLKTRACESETUP), and do all cleanups when ioctl(BLKTRACETEARDOWN). while ftrace-based blktrace allocates/frees bt when: # echo 1/0 > /sys/block/sda/sda1/trace/enable and registers/unregisters tracepoints when: # echo blk/nop > /debugfs/tracing/current_tracer or # echo 1/0 > /debugfs/tracing/tracing_enable The separatation of allocation and registeration causes 2 problems: 1. current user-space blktrace still calls ioctl(TEARDOWN) when ioctl(SETUP) failed: # echo 1 > /sys/block/sda/sda1/trace/enable # blktrace /dev/sda BLKTRACESETUP: Device or resource busy ^C and now blk_probes_ref == -1 2. Another way to make blk_probes_ref == -1: # plugin sdb && mount sdb1 # echo 1 > /sys/block/sdb/sdb1/trace/enable # remove sdb This patch does the allocation and registeration when writing sdaX/trace/enable. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5b28f0f119c..8d6bd12aab1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -478,7 +478,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, goto err; } - if (atomic_add_return(1, &blk_probes_ref) == 1) + if (atomic_inc_return(&blk_probes_ref) == 1) blk_register_tracepoints(); return 0; @@ -1091,8 +1091,6 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { - if (atomic_add_return(1, &blk_probes_ref) == 1) - blk_register_tracepoints(); trace_flags &= ~TRACE_ITER_CONTEXT_INFO; } @@ -1107,15 +1105,10 @@ static int blk_tracer_init(struct trace_array *tr) static void blk_tracer_stop(struct trace_array *tr) { trace_flags |= TRACE_ITER_CONTEXT_INFO; - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); } static void blk_tracer_reset(struct trace_array *tr) { - if (!atomic_read(&blk_probes_ref)) - return; - blk_tracer_enabled = false; blk_tracer_stop(tr); } @@ -1254,6 +1247,9 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + kfree(bt); return 0; } @@ -1280,6 +1276,9 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) return -EBUSY; } + if (atomic_inc_return(&blk_probes_ref) == 1) + blk_register_tracepoints(); + return 0; } -- cgit v1.2.3-70-g09d2 From ad5dd5493a55e462796e42e50a49e76df76fdb05 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 27 Mar 2009 10:20:24 +0800 Subject: blktrace: fix memory leak when freeing struct blk_io_trace Impact: fix mixed ioctl and ftrace-plugin blktrace use memory leak When mixing the use of ioctl-based blktrace and ftrace-based blktrace, we can leak memory in this way: # btrace /dev/sda > /dev/null & # echo 0 > /sys/block/sda/sda1/trace/enable now we leak bt->dropped_file, bt->msg_file, bt->rchan... Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 8d6bd12aab1..2f21d776160 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -247,7 +247,7 @@ record_it: static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); -static void blk_trace_cleanup(struct blk_trace *bt) +static void blk_trace_free(struct blk_trace *bt) { debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); @@ -255,6 +255,11 @@ static void blk_trace_cleanup(struct blk_trace *bt) free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); +} + +static void blk_trace_cleanup(struct blk_trace *bt) +{ + blk_trace_free(bt); if (atomic_dec_and_test(&blk_probes_ref)) blk_unregister_tracepoints(); } @@ -410,11 +415,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (buts->name[i] == '/') buts->name[i] = '_'; - ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) - goto err; + return -ENOMEM; + ret = -ENOMEM; bt->sequence = alloc_percpu(unsigned long); if (!bt->sequence) goto err; @@ -483,17 +488,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, return 0; err: - if (bt) { - if (bt->msg_file) - debugfs_remove(bt->msg_file); - if (bt->dropped_file) - debugfs_remove(bt->dropped_file); - free_percpu(bt->sequence); - free_percpu(bt->msg_data); - if (bt->rchan) - relay_close(bt->rchan); - kfree(bt); - } + blk_trace_free(bt); return ret; } @@ -1091,6 +1086,7 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { + blk_tracer_enabled = true; trace_flags &= ~TRACE_ITER_CONTEXT_INFO; } @@ -1098,18 +1094,17 @@ static int blk_tracer_init(struct trace_array *tr) { blk_tr = tr; blk_tracer_start(tr); - blk_tracer_enabled = true; return 0; } static void blk_tracer_stop(struct trace_array *tr) { + blk_tracer_enabled = false; trace_flags |= TRACE_ITER_CONTEXT_INFO; } static void blk_tracer_reset(struct trace_array *tr) { - blk_tracer_enabled = false; blk_tracer_stop(tr); } @@ -1250,7 +1245,7 @@ static int blk_trace_remove_queue(struct request_queue *q) if (atomic_dec_and_test(&blk_probes_ref)) blk_unregister_tracepoints(); - kfree(bt); + blk_trace_free(bt); return 0; } -- cgit v1.2.3-70-g09d2 From b6a4b0c3ad4c09c1d37b1040ac8e3ebd1016e10b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 27 Mar 2009 10:21:23 +0800 Subject: blktrace: extract duplidate code Impact: cleanup blk_trace_event_print() and blk_tracer_print_line() share most of the code. text data bss dec hex filename 8605 393 12 9010 2332 kernel/trace/blktrace.o.orig text data bss dec hex filename 8555 393 12 8960 2300 kernel/trace/blktrace.o This patch also prepares for the next patch, that prints out BLK_TN_MESSAGE. Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 62 ++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 32 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2f21d776160..c103b0c2002 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -986,29 +986,31 @@ static void get_pdu_remap(const struct trace_entry *ent, r->sector = be64_to_cpu(sector); } -static int blk_log_action_iter(struct trace_iterator *iter, const char *act) +typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); + +static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { char rwbs[6]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; - const struct trace_entry *ent = iter->ent; - const struct blk_io_trace *t = (const struct blk_io_trace *)ent; + const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); return trace_seq_printf(&iter->seq, "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", MAJOR(t->device), MINOR(t->device), iter->cpu, - secs, nsec_rem, ent->pid, act, rwbs); + secs, nsec_rem, iter->ent->pid, act, rwbs); } -static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, - const char *act) +static int blk_log_action(struct trace_iterator *iter, const char *act) { char rwbs[6]; + const struct blk_io_trace *t = te_blk_io_trace(iter->ent); + fill_rwbs(rwbs, t); - return trace_seq_printf(s, "%3d,%-3d %2s %3s ", + return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", MAJOR(t->device), MINOR(t->device), act, rwbs); } @@ -1129,22 +1131,25 @@ static const struct { [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, }; -static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, - int flags) +static enum print_line_t print_one_line(struct trace_iterator *iter, + bool classic) { struct trace_seq *s = &iter->seq; - const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; - const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); + const struct blk_io_trace *t; + u16 what; int ret; + bool long_act; + blk_log_action_t *log_action; - if (!trace_print_context(iter)) - return TRACE_TYPE_PARTIAL_LINE; + t = te_blk_io_trace(iter->ent); + what = t->action & ((1 << BLK_TC_SHIFT) - 1); + long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + log_action = classic ? &blk_log_action_classic : &blk_log_action; if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) ret = trace_seq_printf(s, "Bad pc action %x\n", what); else { - const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); - ret = blk_log_action_seq(s, t, what2act[what].act[long_act]); + ret = log_action(iter, what2act[what].act[long_act]); if (ret) ret = what2act[what].print(s, iter->ent); } @@ -1152,6 +1157,15 @@ static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; } +static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, + int flags) +{ + if (!trace_print_context(iter)) + return TRACE_TYPE_PARTIAL_LINE; + + return print_one_line(iter, false); +} + static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1177,26 +1191,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags) static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) { - const struct blk_io_trace *t; - u16 what; - int ret; - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return TRACE_TYPE_UNHANDLED; - t = (const struct blk_io_trace *)iter->ent; - what = t->action & ((1 << BLK_TC_SHIFT) - 1); - - if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) - ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what); - else { - const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); - ret = blk_log_action_iter(iter, what2act[what].act[long_act]); - if (ret) - ret = what2act[what].print(&iter->seq, iter->ent); - } - - return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + return print_one_line(iter, true); } static struct tracer blk_tracer __read_mostly = { -- cgit v1.2.3-70-g09d2 From 18cea4591a98817697017bcb056a848bae1205df Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 27 Mar 2009 10:21:54 +0800 Subject: blktrace: print out BLK_TN_MESSAGE properly Impact: improve ftrace plugin output Before this patch: # cat trace make-5383 [001] 741.240059: 8,7 P N [make] __trace_note_message: cfq1074 # echo 1 > options/blk_classic # cat trace 8,7 1 0.692221252 0 C W 130411392 + 1024 [0] Bad pc action 6361 Bad pc action 283d # echo 0 > options/blk_classic # echo bin > trace_options # cat trace_pipe | blkparse -i - (can't parse messages generated by blk_add_trace_msg()) After this patch: # cat trace -0 [001] 187.600933: 8,7 C W 145220224 + 8 [0] -0 [001] 187.600946: 8,7 m N cfq1076 complete # echo 1 > options/blk_classic # cat trace 8,7 1 0.256378996 238 I W 113190728 + 8 [pdflush] 8,7 1 0.256378998 238 m N cfq1076 insert_request # echo 0 > options/blk_classic # echo bin > trace_options # cat trace_pipe | blkparse -i - 8,7 1 0 22.973250293 0 C W 102770576 + 8 [0] 8,7 1 0 22.973259213 0 m N cfq1076 complete Signed-off-by: Li Zefan Acked-by: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 80 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 19 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c103b0c2002..947c5b3f90c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -59,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, const void *data, size_t len) { struct blk_io_trace *t; + struct ring_buffer_event *event = NULL; + int pc = 0; + int cpu = smp_processor_id(); + bool blk_tracer = blk_tracer_enabled; + + if (blk_tracer) { + pc = preempt_count(); + event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, + sizeof(*t) + len, + 0, pc); + if (!event) + return; + t = ring_buffer_event_data(event); + goto record_it; + } if (!bt->rchan) return; t = relay_reserve(bt->rchan, sizeof(*t) + len); if (t) { - const int cpu = smp_processor_id(); - t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->time = ktime_to_ns(ktime_get()); +record_it: t->device = bt->dev; t->action = action; t->pid = pid; t->cpu = cpu; t->pdu_len = len; memcpy((void *) t + sizeof(*t), data, len); + + if (blk_tracer) + trace_buffer_unlock_commit(blk_tr, event, 0, pc); } } @@ -110,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) unsigned long flags; char *buf; - if (blk_tracer_enabled) { - va_start(args, fmt); - ftrace_vprintk(fmt, args); - va_end(args); - return; - } - - if (!bt->msg_data) + if (unlikely(bt->trace_state != Blktrace_running && + !blk_tracer_enabled)) return; local_irq_save(flags); @@ -168,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, unsigned long *sequence; pid_t pid; int cpu, pc = 0; + bool blk_tracer = blk_tracer_enabled; - if (unlikely(bt->trace_state != Blktrace_running && - !blk_tracer_enabled)) + if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) return; what |= ddir_act[rw & WRITE]; @@ -185,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, return; cpu = raw_smp_processor_id(); - if (blk_tracer_enabled) { + if (blk_tracer) { tracing_record_cmdline(current); pc = preempt_count(); @@ -235,7 +246,7 @@ record_it: if (pdu_len) memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); - if (blk_tracer_enabled) { + if (blk_tracer) { trace_buffer_unlock_commit(blk_tr, event, 0, pc); return; } @@ -922,6 +933,11 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) int i = 0; int tc = t->action >> BLK_TC_SHIFT; + if (t->action == BLK_TN_MESSAGE) { + rwbs[i++] = 'N'; + goto out; + } + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE) @@ -939,7 +955,7 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) rwbs[i++] = 'S'; if (tc & BLK_TC_META) rwbs[i++] = 'M'; - +out: rwbs[i] = '\0'; } @@ -1074,6 +1090,17 @@ static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) get_pdu_int(ent), cmd); } +static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) +{ + int ret; + const struct blk_io_trace *t = te_blk_io_trace(ent); + + ret = trace_seq_putmem(s, t + 1, t->pdu_len); + if (ret) + return trace_seq_putc(s, '\n'); + return ret; +} + /* * struct tracer operations */ @@ -1146,6 +1173,13 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, long_act = !!(trace_flags & TRACE_ITER_VERBOSE); log_action = classic ? &blk_log_action_classic : &blk_log_action; + if (t->action == BLK_TN_MESSAGE) { + ret = log_action(iter, long_act ? "message" : "m"); + if (ret) + ret = blk_log_msg(s, iter->ent); + goto out; + } + if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) ret = trace_seq_printf(s, "Bad pc action %x\n", what); else { @@ -1153,7 +1187,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, if (ret) ret = what2act[what].print(s, iter->ent); } - +out: return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; } @@ -1253,11 +1287,16 @@ static int blk_trace_remove_queue(struct request_queue *q) static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) { struct blk_trace *old_bt, *bt = NULL; + int ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; + bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); + if (!bt->msg_data) + goto free_bt; + bt->dev = dev; bt->act_mask = (u16)-1; bt->end_lba = -1ULL; @@ -1265,14 +1304,17 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) old_bt = xchg(&q->blk_trace, bt); if (old_bt != NULL) { (void)xchg(&q->blk_trace, old_bt); - kfree(bt); - return -EBUSY; + ret = -EBUSY; + goto free_bt; } if (atomic_inc_return(&blk_probes_ref) == 1) blk_register_tracepoints(); - return 0; + +free_bt: + blk_trace_free(bt); + return ret; } /* -- cgit v1.2.3-70-g09d2 From a4b3ada83d06554d307dd54abdc62b2e5648264a Mon Sep 17 00:00:00 2001 From: Carl Henrik Lunde Date: Fri, 3 Apr 2009 14:27:15 +0200 Subject: blktrace: NUL-terminate user space messages Impact: fix corrupted blkparse output Make sure messages from user space are NUL-terminated strings, otherwise we could dump random memory to the block trace file. Additionally, I've limited the message to BLK_TN_MAX_MSG-1 characters, because the last character would be stripped by vscnprintf anyway. Signed-off-by: Carl Henrik Lunde Cc: Li Zefan Cc: Arnaldo Carvalho de Melo Cc: "Alan D. Brunelle" Cc: Steven Rostedt LKML-Reference: <20090403122714.GT5178@kernel.dk> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 947c5b3f90c..a400b861fad 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, char *msg; struct blk_trace *bt; - if (count > BLK_TN_MAX_MSG) + if (count > BLK_TN_MAX_MSG - 1) return -EINVAL; - msg = kmalloc(count, GFP_KERNEL); + msg = kmalloc(count + 1, GFP_KERNEL); if (msg == NULL) return -ENOMEM; @@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, return -EFAULT; } + msg[count] = '\0'; bt = filp->private_data; __trace_note_message(bt, "%s", msg); kfree(msg); -- cgit v1.2.3-70-g09d2 From 7635b03adf3d7b84da7649b81efa91e6ebf11b85 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 3 Apr 2009 15:31:34 +0800 Subject: blktrace: small cleanup in blk_msg_write() Signed-off-by: Li Zefan Cc: Arnaldo Carvalho de Melo Cc: "Alan D. Brunelle" Cc: Jens Axboe LKML-Reference: <49D5BB56.7000807@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index a400b861fad..73d7860b72e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -327,7 +327,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, char *msg; struct blk_trace *bt; - if (count > BLK_TN_MAX_MSG - 1) + if (count >= BLK_TN_MAX_MSG) return -EINVAL; msg = kmalloc(count + 1, GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From e2494e1b42ebac402324105d57646489d19e2b01 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 2 Apr 2009 13:43:26 +0800 Subject: blktrace: fix pdu_len when tracing packet command requests Impact: output all of packet commands - not just the first 4 / 8 bytes Since commit d7e3c3249ef23b4617393c69fe464765b4ff1645 ("block: add large command support"), struct request->cmd has been changed from unsinged char cmd[BLK_MAX_CDB] to unsigned char *cmd. v1 -> v2: by: FUJITA Tomonori - make sure rq->cmd_len is always intialized, and then we can use rq->cmd_len instead of BLK_MAX_CDB. Signed-off-by: Li Zefan Acked-by: FUJITA Tomonori Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Jens Axboe LKML-Reference: <49D4507E.2060602@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/blk-core.c | 1 + kernel/trace/blktrace.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/block/blk-core.c b/block/blk-core.c index 29bcfac6c68..859879d0a0b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -132,6 +132,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->cmd = rq->__cmd; + rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; rq->ref_count = 1; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 73d7860b72e..b32ff446c3f 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -643,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, - sizeof(rq->cmd), rq->cmd); + rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, -- cgit v1.2.3-70-g09d2 From 9eb85125ce218a8b8d9a7c982510388e227adbec Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 9 Apr 2009 11:19:40 +0800 Subject: blktrace: pass the right pointer to kfree() Impact: fix kfree crash with non-standard act_mask string If passing a string with leading white spaces to strstrip(), the returned ptr != the original ptr. This bug was introduced by me. Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49DD694C.8020902@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b32ff446c3f..921ef5d1f0b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1377,12 +1377,12 @@ static int blk_trace_str2mask(const char *str) { int i; int mask = 0; - char *s, *token; + char *buf, *s, *token; - s = kstrdup(str, GFP_KERNEL); - if (s == NULL) + buf = kstrdup(str, GFP_KERNEL); + if (buf == NULL) return -ENOMEM; - s = strstrip(s); + s = strstrip(buf); while (1) { token = strsep(&s, ","); @@ -1403,7 +1403,7 @@ static int blk_trace_str2mask(const char *str) break; } } - kfree(s); + kfree(buf); return mask; } -- cgit v1.2.3-70-g09d2