From b78825d608f30a47e3154ab6872a03f0de0c9d45 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 1 Apr 2009 16:18:53 +0800 Subject: blktrace: fix output of unknown events Not all events are pc (packet command) events. An event is a pc event only if it has BLK_TC_PC bit set. Signed-off-by: Li Zefan Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <49D3236D.3090705@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 921ef5d1f0b..e45e1af1356 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1182,7 +1182,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, } if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) - ret = trace_seq_printf(s, "Bad pc action %x\n", what); + ret = trace_seq_printf(s, "Unknown action %x\n", what); else { ret = log_action(iter, what2act[what].act[long_act]); if (ret) -- cgit v1.2.3-70-g09d2 From 66de7792c02693b49671afe58c771fde3b092fc7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 1 Apr 2009 16:19:19 +0800 Subject: blktrace: fix output of BLK_TC_PC events BLK_TC_PC events should be treated differently with BLK_TC_FS events. Before this patch: # echo 1 > /sys/block/sda/sda1/trace/enable # echo pc > /sys/block/sda/sda1/trace/act_mask # echo blk > /debugfs/tracing/current_tracer # (generate some BLK_TC_PC events) # cat trace bash-2184 [000] 1774.275413: 8,7 I N [bash] bash-2184 [000] 1774.275435: 8,7 D N [bash] bash-2184 [000] 1774.275540: 8,7 I R [bash] bash-2184 [000] 1774.275547: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275580: 8,7 C N 0 [0] bash-2184 [000] 1774.275648: 8,7 I R [bash] bash-2184 [000] 1774.275653: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275682: 8,7 C N 0 [0] bash-2184 [000] 1774.275739: 8,7 I R [bash] bash-2184 [000] 1774.275744: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275771: 8,7 C N 0 [0] bash-2184 [000] 1774.275804: 8,7 I R [bash] bash-2184 [000] 1774.275808: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275836: 8,7 C N 0 [0] After this patch: # cat trace bash-2263 [000] 366.782149: 8,7 I N 0 (00 ..) [bash] bash-2263 [000] 366.782323: 8,7 D N 0 (00 ..) [bash] bash-2263 [000] 366.782557: 8,7 I R 8 (25 00 ..) [bash] bash-2263 [000] 366.782560: 8,7 D R 8 (25 00 ..) [bash] ksoftirqd/0-4 [000] 366.782582: 8,7 C N (25 00 ..) [0] bash-2263 [000] 366.782648: 8,7 I R 8 (5a 00 3f 00) [bash] bash-2263 [000] 366.782650: 8,7 D R 8 (5a 00 3f 00) [bash] ksoftirqd/0-4 [000] 366.782669: 8,7 C N (5a 00 3f 00) [0] bash-2263 [000] 366.782710: 8,7 I R 8 (5a 00 08 00) [bash] bash-2263 [000] 366.782713: 8,7 D R 8 (5a 00 08 00) [bash] ksoftirqd/0-4 [000] 366.782730: 8,7 C N (5a 00 08 00) [0] bash-2263 [000] 366.783375: 8,7 I R 36 (5a 00 08 00) [bash] bash-2263 [000] 366.783379: 8,7 D R 36 (5a 00 08 00) [bash] ksoftirqd/0-4 [000] 366.783404: 8,7 C N (5a 00 08 00) [0] This is what we do with PC events in user-space blktrace. Signed-off-by: Li Zefan Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <49D32387.9040106@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 88 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 8 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e45e1af1356..2b98195b338 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -971,6 +971,16 @@ static inline const void *pdu_start(const struct trace_entry *ent) return te_blk_io_trace(ent) + 1; } +static inline u32 t_action(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->action; +} + +static inline u32 t_bytes(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->bytes; +} + static inline u32 t_sec(const struct trace_entry *ent) { return te_blk_io_trace(ent)->bytes >> 9; @@ -1031,25 +1041,87 @@ static int blk_log_action(struct trace_iterator *iter, const char *act) MAJOR(t->device), MINOR(t->device), act, rwbs); } +static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) +{ + const char *pdu_buf; + int pdu_len; + int i, end, ret; + + pdu_buf = pdu_start(ent); + pdu_len = te_blk_io_trace(ent)->pdu_len; + + if (!pdu_len) + return 1; + + /* find the last zero that needs to be printed */ + for (end = pdu_len - 1; end >= 0; end--) + if (pdu_buf[end]) + break; + end++; + + if (!trace_seq_putc(s, '(')) + return 0; + + for (i = 0; i < pdu_len; i++) { + + ret = trace_seq_printf(s, "%s%02x", + i == 0 ? "" : " ", pdu_buf[i]); + if (!ret) + return ret; + + /* + * stop when the rest is just zeroes and indicate so + * with a ".." appended + */ + if (i == end && end != pdu_len - 1) + return trace_seq_puts(s, " ..) "); + } + + return trace_seq_puts(s, ") "); +} + static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%s]\n", - t_sector(ent), t_sec(ent), cmd); - return trace_seq_printf(s, "[%s]\n", cmd); + if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { + int ret; + + ret = trace_seq_printf(s, "%u ", t_bytes(ent)); + if (!ret) + return 0; + ret = blk_log_dump_pdu(s, ent); + if (!ret) + return 0; + return trace_seq_printf(s, "[%s]\n", cmd); + } else { + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%s]\n", + t_sector(ent), t_sec(ent), cmd); + return trace_seq_printf(s, "[%s]\n", cmd); + } } static int blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) { - if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), - t_sec(ent), t_error(ent)); - return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); + if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { + int ret; + + ret = blk_log_dump_pdu(s, ent); + if (ret) + return trace_seq_printf(s, "[%d]\n", t_error(ent)); + return 0; + } else { + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%d]\n", + t_sector(ent), + t_sec(ent), t_error(ent)); + return trace_seq_printf(s, "%llu [%d]\n", + t_sector(ent), t_error(ent)); + } } static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) -- cgit v1.2.3-70-g09d2 From d0deef5b14af7d5bbd0003a0a2a1a32326e20a6d Mon Sep 17 00:00:00 2001 From: Shawn Du Date: Tue, 14 Apr 2009 13:58:56 +0800 Subject: blktrace: support per-partition tracing Though one can specify '-d /dev/sda1' when using blktrace, it still traces the whole sda. To support per-partition tracing, when we start tracing, we initialize bt->start_lba and bt->end_lba to the start and end sector of that partition. Note some actions are per device, thus we don't filter 0-sector events. The original patch and discussion can be found here: http://marc.info/?l=linux-btrace&m=122949374214540&w=2 Signed-off-by: Shawn Du Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Jens Axboe LKML-Reference: <49E42620.4050701@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/compat_ioctl.c | 2 +- drivers/scsi/sg.c | 1 + include/linux/blktrace_api.h | 24 +++++++++++++----------- kernel/trace/blktrace.c | 29 +++++++++++++++++++++-------- 4 files changed, 36 insertions(+), 20 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615dea46..f8c218cd08e 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) memcpy(&buts.name, &cbuts.name, 32); mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); + ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts); mutex_unlock(&bdev->bd_mutex); if (ret) return ret; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 82312df9b0b..49c98730bb8 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp, return blk_trace_setup(sdp->device->request_queue, sdp->disk->disk_name, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), + NULL, (char *)arg); case BLKTRACESTART: return blk_trace_startstop(sdp->device->request_queue, 1); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d960889e92e..267edc4017e 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -165,8 +165,9 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, - char *name, dev_t dev, struct blk_user_trace_setup *buts); +extern int do_blk_trace_setup(struct request_queue *q, char *name, + dev_t dev, struct block_device *bdev, + struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** @@ -193,6 +194,7 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); extern void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); @@ -200,15 +202,15 @@ extern int blk_trace_remove(struct request_queue *q); extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ -#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) -#define blk_trace_shutdown(q) do { } while (0) -#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) -#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) -#define blk_trace_startstop(q, start) (-ENOTTY) -#define blk_trace_remove(q) (-ENOTTY) -#define blk_add_trace_msg(q, fmt, ...) do { } while (0) - +# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) +# define blk_trace_shutdown(q) do { } while (0) +# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) +# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) +# define blk_trace_startstop(q, start) (-ENOTTY) +# define blk_trace_remove(q) (-ENOTTY) +# define blk_add_trace_msg(q, fmt, ...) do { } while (0) #endif /* CONFIG_BLK_DEV_IO_TRACE */ + #endif /* __KERNEL__ */ #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2b98195b338..e932654cf59 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -147,7 +147,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; - if (sector < bt->start_lba || sector > bt->end_lba) + if (sector && (sector < bt->start_lba || sector > bt->end_lba)) return 1; if (bt->pid && pid != bt->pid) return 1; @@ -192,7 +192,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, DISCARD); pid = tsk->pid; - if (unlikely(act_log_check(bt, what, sector, pid))) + if (act_log_check(bt, what, sector, pid)) return; cpu = raw_smp_processor_id(); @@ -407,11 +407,13 @@ static struct rchan_callbacks blk_relay_callbacks = { * Setup everything required to start tracing */ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct blk_user_trace_setup *buts) + struct block_device *bdev, + struct blk_user_trace_setup *buts) { struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; int ret, i; + struct hd_struct *part = NULL; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -480,11 +482,21 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->act_mask) bt->act_mask = (u16) -1; - bt->start_lba = buts->start_lba; - bt->end_lba = buts->end_lba; - if (!bt->end_lba) + if (bdev) + part = bdev->bd_part; + + if (part) { + bt->start_lba = part->start_sect; + bt->end_lba = part->start_sect + part->nr_sects; + } else bt->end_lba = -1ULL; + /* overwrite with user settings */ + if (buts->start_lba) + bt->start_lba = buts->start_lba; + if (buts->end_lba) + bt->end_lba = buts->end_lba; + bt->pid = buts->pid; bt->trace_state = Blktrace_setup; @@ -505,6 +517,7 @@ err: } int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; @@ -514,7 +527,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (ret) return -EFAULT; - ret = do_blk_trace_setup(q, name, dev, &buts); + ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; @@ -582,7 +595,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: bdevname(bdev, b); - ret = blk_trace_setup(q, b, bdev->bd_dev, arg); + ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; case BLKTRACESTART: start = 1; -- cgit v1.2.3-70-g09d2 From 9908c30997b8a73c95f836170b9998dae9aa3f4a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Apr 2009 13:59:34 +0800 Subject: blktrace: support per-partition tracing for ftrace plugin The previous patch adds support to trace a single partition for relay+ioctl blktrace, and this patch is for ftrace plugin blktrace: # echo 1 > /sys/block/sda/sda7/enable # cat start_lba 102398373 # cat end_lba 102703545 Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Shawn Du Cc: Jens Axboe LKML-Reference: <49E42646.4060608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e932654cf59..d1098988052 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -403,6 +403,23 @@ static struct rchan_callbacks blk_relay_callbacks = { .remove_buf_file = blk_remove_buf_file_callback, }; +static void blk_trace_setup_lba(struct blk_trace *bt, + struct block_device *bdev) +{ + struct hd_struct *part = NULL; + + if (bdev) + part = bdev->bd_part; + + if (part) { + bt->start_lba = part->start_sect; + bt->end_lba = part->start_sect + part->nr_sects; + } else { + bt->start_lba = 0; + bt->end_lba = -1ULL; + } +} + /* * Setup everything required to start tracing */ @@ -413,7 +430,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; int ret, i; - struct hd_struct *part = NULL; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -482,14 +498,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->act_mask) bt->act_mask = (u16) -1; - if (bdev) - part = bdev->bd_part; - - if (part) { - bt->start_lba = part->start_sect; - bt->end_lba = part->start_sect + part->nr_sects; - } else - bt->end_lba = -1ULL; + blk_trace_setup_lba(bt, bdev); /* overwrite with user settings */ if (buts->start_lba) @@ -1370,7 +1379,8 @@ static int blk_trace_remove_queue(struct request_queue *q) /* * Setup everything required to start tracing */ -static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) +static int blk_trace_setup_queue(struct request_queue *q, + struct block_device *bdev) { struct blk_trace *old_bt, *bt = NULL; int ret = -ENOMEM; @@ -1383,9 +1393,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) if (!bt->msg_data) goto free_bt; - bt->dev = dev; + bt->dev = bdev->bd_dev; bt->act_mask = (u16)-1; - bt->end_lba = -1ULL; + + blk_trace_setup_lba(bt, bdev); old_bt = xchg(&q->blk_trace, bt); if (old_bt != NULL) { @@ -1602,7 +1613,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (attr == &dev_attr_enable) { if (value) - ret = blk_trace_setup_queue(q, bdev->bd_dev); + ret = blk_trace_setup_queue(q, bdev); else ret = blk_trace_remove_queue(q); goto out_unlock_bdev; @@ -1610,7 +1621,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, ret = 0; if (q->blk_trace == NULL) - ret = blk_trace_setup_queue(q, bdev->bd_dev); + ret = blk_trace_setup_queue(q, bdev); if (ret == 0) { if (attr == &dev_attr_act_mask) -- cgit v1.2.3-70-g09d2 From 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Apr 2009 14:00:05 +0800 Subject: blktrace: add trace/ to /sys/block/sda Impact: allow ftrace-plugin blktrace to trace device-mapper devices To trace a single partition: # echo 1 > /sys/block/sda/sda1/enable To trace the whole sda instead: # echo 1 > /sys/block/sda/enable Thus we also fix an issue reported by Ted, that ftrace-plugin blktrace can't be used to trace device-mapper devices. Now: # echo 1 > /sys/block/dm-0/trace/enable echo: write error: No such device or address # mount -t ext4 /dev/dm-0 /mnt # echo 1 > /sys/block/dm-0/trace/enable # echo blk > /debug/tracing/current_tracer Reported-by: Theodore Tso Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Shawn Du Cc: Jens Axboe LKML-Reference: <49E42665.6020506@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/blk-sysfs.c | 7 ++++++- include/linux/blktrace_api.h | 6 ++++++ kernel/trace/blktrace.c | 5 +++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 73f36beff5c..8653d710b39 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -387,16 +387,21 @@ struct kobj_type blk_queue_ktype = { int blk_register_queue(struct gendisk *disk) { int ret; + struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; if (WARN_ON(!q)) return -ENXIO; + ret = blk_trace_init_sysfs(dev); + if (ret) + return ret; + if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) return ret; diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 267edc4017e..62763c95285 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -210,6 +211,11 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +static inline int blk_trace_init_sysfs(struct device *dev) +{ + return 0; +} + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* __KERNEL__ */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d1098988052..8e7c5da3a3e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1644,3 +1644,8 @@ out: return ret ? ret : count; } +int blk_trace_init_sysfs(struct device *dev) +{ + return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); +} + -- cgit v1.2.3-70-g09d2 From f3948f8857ef5de239f28a61dddb1554a0ae4c2c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 15 Apr 2009 11:02:56 +0800 Subject: blktrace: fix context-info when mixed-using blk tracer and trace events When current tracer is set to blk tracer, TRACE_ITER_CONTEXT_INFO is unset, but actually context-info is printed: pdflush-431 [000] 821.181576: 8,0 P N [pdflush] And then if we enable TRACE_ITER_CONTEXT_INFO: # echo context-info > trace_options We'll see context-info printed twice. What's worse, when we use blk tracer and trace events at the same time, we'll see no context-info for trace events at all: jbd2_commit_logging: dev dm-0:8 transaction 333227 jbd2_end_commit: dev dm-0:8 transaction 333227 head 332814 rm-25433 [001] 9578.307485: 8,18 m N cfq25433 slice expired t=0 rm-25433 [001] 9578.307486: 8,18 m N cfq25433 put_queue This patch adds blk_tracer->set_flags(), and context-info flag is unset only when we set the output to classic mode. Note after this patch, one should unset context-info explicitly if he wants to get binary output that can be parsed by blkparse: # echo nocontext-info > trace_options # echo bin > trace_options # echo blk > current_tracer # cat trace_pipe | blkparse -i - Reported-by: Theodore Ts'o Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E54E60.50408@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 8e7c5da3a3e..c32062bd10b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1211,7 +1211,6 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { blk_tracer_enabled = true; - trace_flags &= ~TRACE_ITER_CONTEXT_INFO; } static int blk_tracer_init(struct trace_array *tr) @@ -1224,7 +1223,6 @@ static int blk_tracer_init(struct trace_array *tr) static void blk_tracer_stop(struct trace_array *tr) { blk_tracer_enabled = false; - trace_flags |= TRACE_ITER_CONTEXT_INFO; } static void blk_tracer_reset(struct trace_array *tr) @@ -1289,9 +1287,6 @@ out: static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, int flags) { - if (!trace_print_context(iter)) - return TRACE_TYPE_PARTIAL_LINE; - return print_one_line(iter, false); } @@ -1326,6 +1321,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) return print_one_line(iter, true); } +static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) +{ + /* don't output context-info for blk_classic output */ + if (bit == TRACE_BLK_OPT_CLASSIC) { + if (set) + trace_flags &= ~TRACE_ITER_CONTEXT_INFO; + else + trace_flags |= TRACE_ITER_CONTEXT_INFO; + } + return 0; +} + static struct tracer blk_tracer __read_mostly = { .name = "blk", .init = blk_tracer_init, @@ -1335,6 +1342,7 @@ static struct tracer blk_tracer __read_mostly = { .print_header = blk_tracer_print_header, .print_line = blk_tracer_print_line, .flags = &blk_tracer_flags, + .set_flag = blk_tracer_set_flag, }; static struct trace_event trace_blk_event = { -- cgit v1.2.3-70-g09d2 From a42aaa3bbce85ac487ad4fad5db99e8e91b7aac1 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Mon, 4 May 2009 16:27:26 -0400 Subject: blktrace: correct remap names This attempts to clarify names utilized during block I/O remap operations (partition, volume manager). It correctly matches up the /from/ information for both device & sector. This takes in the concept from Kosaki Motohiro and extends it to include better naming for the "device_from" field. [ Impact: cleanup ] Signed-off-by: Alan D. Brunelle Reviewed-by: Li Zefan Reviewed-by: KOSAKI Motohiro Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49FF4FAE.3000301@hp.com> Signed-off-by: Ingo Molnar --- include/linux/blktrace_api.h | 4 ++-- include/trace/block.h | 4 ++-- kernel/trace/blktrace.c | 24 ++++++++++++------------ 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 62763c95285..82b4636030e 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,9 +116,9 @@ struct blk_io_trace { * The remap event */ struct blk_io_trace_remap { - __be32 device; __be32 device_from; - __be64 sector; + __be32 device_to; + __be64 sector_from; }; enum { diff --git a/include/trace/block.h b/include/trace/block.h index 25b7068b819..87f6456fd32 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TP_ARGS(q, bio, dev, from, to)); + sector_t to, sector_t from), + TP_ARGS(q, bio, dev, to, from)); #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c32062bd10b..f8d46d6f5d3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -830,8 +830,8 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * @q: queue the io is for * @bio: the source bio * @dev: target device - * @from: source sector * @to: target sector + * @from: source sector * * Description: * Device mapper or raid target sometimes need to split a bio because @@ -839,7 +839,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * **/ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from, sector_t to) + dev_t dev, sector_t to, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -847,9 +847,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, if (likely(!bt)) return; - r.device = cpu_to_be32(dev); - r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); - r.sector = cpu_to_be64(to); + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); @@ -1028,11 +1028,11 @@ static void get_pdu_remap(const struct trace_entry *ent, struct blk_io_trace_remap *r) { const struct blk_io_trace_remap *__r = pdu_start(ent); - __u64 sector = __r->sector; + __u64 sector_from = __r->sector_from; - r->device = be32_to_cpu(__r->device); r->device_from = be32_to_cpu(__r->device_from); - r->sector = be64_to_cpu(sector); + r->device_to = be32_to_cpu(__r->device_to); + r->sector_from = be64_to_cpu(sector_from); } typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); @@ -1148,13 +1148,13 @@ static int blk_log_with_error(struct trace_seq *s, static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) { - struct blk_io_trace_remap r = { .device = 0, }; + struct blk_io_trace_remap r = { .device_from = 0, }; get_pdu_remap(ent, &r); return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", - t_sector(ent), - t_sec(ent), MAJOR(r.device), MINOR(r.device), - (unsigned long long)r.sector); + t_sector(ent), t_sec(ent), + MAJOR(r.device_from), MINOR(r.device_from), + (unsigned long long)r.sector_from); } static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) -- cgit v1.2.3-70-g09d2 From 22a7c31a9659deaddafbbcec6562d44141e84474 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Mon, 4 May 2009 16:35:08 -0400 Subject: blktrace: from-sector redundant in trace_block_remap Remove redundant from-sector parameter: it's /always/ the bio's sector passed in. [ Impact: cleanup ] Signed-off-by: Alan D. Brunelle Reviewed-by: Li Zefan Reviewed-by: KOSAKI Motohiro Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49FF517C.7000503@hp.com> Signed-off-by: Ingo Molnar --- block/blk-core.c | 5 ++--- drivers/md/dm.c | 3 +-- include/trace/block.h | 4 ++-- kernel/trace/blktrace.c | 8 ++++---- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/block/blk-core.c b/block/blk-core.c index 07ab75403e1..a5f747a8312 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1275,7 +1275,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_bdev = bdev->bd_contains; trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, bio->bi_sector, + bdev->bd_dev, bio->bi_sector - p->start_sect); } } @@ -1444,8 +1444,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (old_sector != -1) - trace_block_remap(q, bio, old_dev, bio->bi_sector, - old_sector); + trace_block_remap(q, bio, old_dev, old_sector); trace_block_bio_queue(q, bio); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a994be035b..b01514afb6b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -657,8 +657,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* the bio has been remapped so dispatch it */ trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, - clone->bi_sector, sector); + tio->io->bio->bi_bdev->bd_dev, sector); generic_make_request(clone); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { diff --git a/include/trace/block.h b/include/trace/block.h index 87f6456fd32..8ac945b7746 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -70,7 +70,7 @@ DECLARE_TRACE(block_split, DECLARE_TRACE(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t to, sector_t from), - TP_ARGS(q, bio, dev, to, from)); + sector_t to), + TP_ARGS(q, bio, dev, to)); #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index f8d46d6f5d3..e099f8cc1d1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -830,7 +830,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * @q: queue the io is for * @bio: the source bio * @dev: target device - * @to: target sector * @from: source sector * * Description: @@ -839,7 +838,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio, * **/ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t to, sector_t from) + dev_t dev, sector_t from) { struct blk_trace *bt = q->blk_trace; struct blk_io_trace_remap r; @@ -851,8 +850,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); r.sector_from = cpu_to_be64(from); - __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, - !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, + BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), + sizeof(r), &r); } /** -- cgit v1.2.3-70-g09d2 From 5b93629b4509c03ffa87a9316412fedf6f58cb37 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:38 +0900 Subject: block: implement blk_rq_pos/[cur_]sectors() and convert obvious ones Implement accessors - blk_rq_pos(), blk_rq_sectors() and blk_rq_cur_sectors() which return rq->hard_sector, rq->hard_nr_sectors and rq->hard_cur_sectors respectively and convert direct references of the said fields to the accessors. This is in preparation of request data length handling cleanup. Geert : suggested adding const to struct request * parameter to accessors Sergei : spotted error in patch description [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: Geert Uytterhoeven Acked-by: Stephen Rothwell Tested-by: Grant Likely Acked-by: Grant Likely Ackec-by: Sergei Shtylyov Cc: Bartlomiej Zolnierkiewicz Cc: Borislav Petkov Cc: James Bottomley Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- block/blk-core.c | 2 +- block/cfq-iosched.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/viodasd.c | 6 +++--- drivers/block/xsysace.c | 10 +++++----- drivers/ide/ide-cd.c | 8 ++++---- drivers/ide/ide-io.c | 4 ++-- drivers/message/i2o/i2o_block.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- include/linux/blkdev.h | 23 ++++++++++++++++++++--- kernel/trace/blktrace.c | 4 ++-- 12 files changed, 42 insertions(+), 25 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index c8d087655ef..c167de5b9ef 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -163,7 +163,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ - if (!rq->hard_nr_sectors) { + if (!blk_rq_sectors(rq)) { q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); /* diff --git a/block/blk-core.c b/block/blk-core.c index 394c5bd8127..895e55b74a4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1683,7 +1683,7 @@ static void blk_account_io_done(struct request *req) unsigned int blk_rq_bytes(struct request *rq) { if (blk_fs_request(rq)) - return rq->hard_nr_sectors << 9; + return blk_rq_sectors(rq) << 9; return rq->data_len; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index def0c698a4b..575083a9ffe 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -760,7 +760,7 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", cfqd->rq_in_driver); - cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; + cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); } static void cfq_deactivate_request(struct request_queue *q, struct request *rq) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index f6586e4d351..c2388673684 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -136,7 +136,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, dev_dbg(&dev->sbd.core, "%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n", __func__, __LINE__, op, n, req->nr_sectors, - req->hard_nr_sectors); + blk_rq_sectors(req)); #endif start_sector = req->sector * priv->blocking_factor; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index ecccf65dce2..e821eed7132 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -368,12 +368,12 @@ static void do_viodasd_request(struct request_queue *q) blkdev_dequeue_request(req); /* check that request contains a valid command */ if (!blk_fs_request(req)) { - viodasd_end_request(req, -EIO, req->hard_nr_sectors); + viodasd_end_request(req, -EIO, blk_rq_sectors(req)); continue; } /* Try sending the request */ if (send_request(req) != 0) - viodasd_end_request(req, -EIO, req->hard_nr_sectors); + viodasd_end_request(req, -EIO, blk_rq_sectors(req)); } } @@ -590,7 +590,7 @@ static int viodasd_handle_read_write(struct vioblocklpevent *bevent) err = vio_lookup_rc(viodasd_err_table, bevent->sub_result); printk(VIOD_KERN_WARNING "read/write error %d:0x%04x (%s)\n", event->xRc, bevent->sub_result, err->msg); - num_sect = req->hard_nr_sectors; + num_sect = blk_rq_sectors(req); } qlock = req->q->queue_lock; spin_lock_irqsave(qlock, irq_flags); diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index b1e1d7e5ab1..5722931d14c 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -645,8 +645,8 @@ static void ace_fsm_dostate(struct ace_device *ace) /* Okay, it's a data request, set it up for transfer */ dev_dbg(ace->dev, - "request: sec=%llx hcnt=%lx, ccnt=%x, dir=%i\n", - (unsigned long long) req->sector, req->hard_nr_sectors, + "request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n", + (unsigned long long) req->sector, blk_rq_sectors(req), req->current_nr_sectors, rq_data_dir(req)); ace->req = req; @@ -654,7 +654,7 @@ static void ace_fsm_dostate(struct ace_device *ace) ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR; ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF); - count = req->hard_nr_sectors; + count = blk_rq_sectors(req); if (rq_data_dir(req)) { /* Kick off write request */ dev_dbg(ace->dev, "write data\n"); @@ -719,8 +719,8 @@ static void ace_fsm_dostate(struct ace_device *ace) /* bio finished; is there another one? */ if (__blk_end_request(ace->req, 0, blk_rq_cur_bytes(ace->req))) { - /* dev_dbg(ace->dev, "next block; h=%li c=%i\n", - * ace->req->hard_nr_sectors, + /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", + * blk_rq_sectors(ace->req), * ace->req->current_nr_sectors); */ ace->data_ptr = ace->req->buffer; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 8bbe222c5e4..182320dd6ea 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -730,7 +730,7 @@ out_end: if (blk_pc_request(rq)) nsectors = (rq->data_len + 511) >> 9; else - nsectors = rq->hard_nr_sectors; + nsectors = blk_rq_sectors(rq); if (nsectors == 0) nsectors = 1; @@ -875,7 +875,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, return ide_issue_pc(drive, &cmd); out_end: - nsectors = rq->hard_nr_sectors; + nsectors = blk_rq_sectors(rq); if (nsectors == 0) nsectors = 1; @@ -1359,8 +1359,8 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) { int hard_sect = queue_hardsect_size(q); - long block = (long)rq->hard_sector / (hard_sect >> 9); - unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9); + long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); + unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); memset(rq->cmd, 0, BLK_MAX_CDB); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a0309ea661a..df23bcbd94b 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -118,7 +118,7 @@ unsigned int ide_rq_bytes(struct request *rq) if (blk_pc_request(rq)) return rq->data_len; else - return rq->hard_cur_sectors << 9; + return blk_rq_cur_sectors(rq) << 9; } EXPORT_SYMBOL_GPL(ide_rq_bytes); @@ -133,7 +133,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes) * and complete the whole request right now */ if (blk_noretry_request(rq) && error <= 0) - nr_bytes = rq->hard_nr_sectors << 9; + nr_bytes = blk_rq_sectors(rq) << 9; rc = ide_end_rq(drive, rq, error, nr_bytes); if (rc == 0) diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 221317e6a00..56e60f0d531 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -427,7 +427,7 @@ static void i2o_block_end_request(struct request *req, int error, unsigned long flags; if (blk_end_request(req, error, nr_bytes)) { - int leftover = (req->hard_nr_sectors << KERNEL_SECTOR_SHIFT); + int leftover = (blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT); if (blk_pc_request(req)) leftover = req->data_len; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7d49ef589f3..9ff0ca9988a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -546,7 +546,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error, * to queue the remainder of them. */ if (blk_end_request(req, error, bytes)) { - int leftover = (req->hard_nr_sectors << 9); + int leftover = blk_rq_sectors(req) << 9; if (blk_pc_request(req)) leftover = req->resid_len; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a967cad89f..4e5f8559872 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -832,13 +832,30 @@ static inline void blk_run_address_space(struct address_space *mapping) extern void blkdev_dequeue_request(struct request *req); /* - * blk_end_request() takes bytes instead of sectors as a complete size. - * blk_rq_bytes() returns bytes left to complete in the entire request. - * blk_rq_cur_bytes() returns bytes left to complete in the current segment. + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment */ +static inline sector_t blk_rq_pos(const struct request *rq) +{ + return rq->hard_sector; +} + extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); +static inline unsigned int blk_rq_sectors(const struct request *rq) +{ + return rq->hard_nr_sectors; +} + +static inline unsigned int blk_rq_cur_sectors(const struct request *rq) +{ + return rq->hard_cur_sectors; +} + /* * Request completion related functions. * diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 921ef5d1f0b..42f1c11e754 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -646,7 +646,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, rw, what, rq->errors, 0, NULL); } } @@ -857,7 +857,7 @@ void blk_add_driver_data(struct request_queue *q, __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, rq->errors, len, data); else - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, 0, BLK_TA_DRV_DATA, rq->errors, len, data); } EXPORT_SYMBOL_GPL(blk_add_driver_data); -- cgit v1.2.3-70-g09d2 From 2e46e8b27aa57c6bd34b3102b40ee4d0144b4fab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2009 22:24:41 +0900 Subject: block: drop request->hard_* and *nr_sectors struct request has had a few different ways to represent some properties of a request. ->hard_* represent block layer's view of the request progress (completion cursor) and the ones without the prefix are supposed to represent the issue cursor and allowed to be updated as necessary by the low level drivers. The thing is that as block layer supports partial completion, the two cursors really aren't necessary and only cause confusion. In addition, manual management of request detail from low level drivers is cumbersome and error-prone at the very least. Another interesting duplicate fields are rq->[hard_]nr_sectors and rq->{hard_cur|current}_nr_sectors against rq->data_len and rq->bio->bi_size. This is more convoluted than the hard_ case. rq->[hard_]nr_sectors are initialized for requests with bio but blk_rq_bytes() uses it only for !pc requests. rq->data_len is initialized for all request but blk_rq_bytes() uses it only for pc requests. This causes good amount of confusion throughout block layer and its drivers and determining the request length has been a bit of black magic which may or may not work depending on circumstances and what the specific LLD is actually doing. rq->{hard_cur|current}_nr_sectors represent the number of sectors in the contiguous data area at the front. This is mainly used by drivers which transfers data by walking request segment-by-segment. This value always equals rq->bio->bi_size >> 9. However, data length for pc requests may not be multiple of 512 bytes and using this field becomes a bit confusing. In general, having multiple fields to represent the same property leads only to confusion and subtle bugs. With recent block low level driver cleanups, no driver is accessing or manipulating these duplicate fields directly. Drop all the duplicates. Now rq->sector means the current sector, rq->data_len the current total length and rq->bio->bi_size the current segment length. Everything else is defined in terms of these three and available only through accessors. * blk_recalc_rq_sectors() is collapsed into blk_update_request() and now handles pc and fs requests equally other than rq->sector update. This means that now pc requests can use partial completion too (no in-kernel user yet tho). * bio_cur_sectors() is replaced with bio_cur_bytes() as block layer now uses byte count as the primary data length. * blk_rq_pos() is now guranteed to be always correct. In-block users converted. * blk_rq_bytes() is now guaranteed to be always valid as is blk_rq_sectors(). In-block users converted. * blk_rq_sectors() is now guaranteed to equal blk_rq_bytes() >> 9. More convenient one is used. * blk_rq_bytes() and blk_rq_cur_bytes() are now inlined and take const pointer to request. [ Impact: API cleanup, single way to represent one property of a request ] Signed-off-by: Tejun Heo Cc: Boaz Harrosh Signed-off-by: Jens Axboe --- block/blk-core.c | 81 ++++++++++++++++++------------------------------ block/blk-merge.c | 36 +++------------------ block/blk.h | 1 - block/cfq-iosched.c | 10 +++--- include/linux/bio.h | 6 ++-- include/linux/blkdev.h | 37 ++++++++++------------ include/linux/elevator.h | 2 +- kernel/trace/blktrace.c | 16 +++++----- 8 files changed, 67 insertions(+), 122 deletions(-) (limited to 'kernel/trace/blktrace.c') diff --git a/block/blk-core.c b/block/blk-core.c index 82dc20621c0..3596ca71909 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -127,7 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_LIST_HEAD(&rq->timeout_list); rq->cpu = -1; rq->q = q; - rq->sector = rq->hard_sector = (sector_t) -1; + rq->sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->cmd = rq->__cmd; @@ -189,8 +189,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg) (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", - rq->bio, rq->biotail, - rq->buffer, rq->data_len); + rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); if (blk_pc_request(rq)) { printk(KERN_INFO " cdb: "); @@ -1096,7 +1095,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_NOIDLE; req->errors = 0; - req->hard_sector = req->sector = bio->bi_sector; + req->sector = bio->bi_sector; req->ioprio = bio_prio(bio); blk_rq_bio_prep(req->q, req, bio); } @@ -1113,14 +1112,13 @@ static inline bool queue_should_plug(struct request_queue *q) static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors; + int el_ret; + unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); const int unplug = bio_unplug(bio); int rw_flags; - nr_sectors = bio_sectors(bio); - /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1145,7 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) req->biotail->bi_next = bio; req->biotail = bio; - req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1171,10 +1169,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->current_nr_sectors = bio_cur_sectors(bio); - req->hard_cur_sectors = req->current_nr_sectors; - req->sector = req->hard_sector = bio->bi_sector; - req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->sector = bio->bi_sector; + req->data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1557,7 +1553,7 @@ EXPORT_SYMBOL(submit_bio); int blk_rq_check_limits(struct request_queue *q, struct request *rq) { if (blk_rq_sectors(rq) > q->max_sectors || - rq->data_len > q->max_hw_sectors << 9) { + blk_rq_bytes(rq) > q->max_hw_sectors << 9) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } @@ -1675,35 +1671,6 @@ static void blk_account_io_done(struct request *req) } } -/** - * blk_rq_bytes - Returns bytes left to complete in the entire request - * @rq: the request being processed - **/ -unsigned int blk_rq_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return blk_rq_sectors(rq) << 9; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_bytes); - -/** - * blk_rq_cur_bytes - Returns bytes left to complete in the current segment - * @rq: the request being processed - **/ -unsigned int blk_rq_cur_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return rq->current_nr_sectors << 9; - - if (rq->bio) - return rq->bio->bi_size; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); - struct request *elv_next_request(struct request_queue *q) { struct request *rq; @@ -1736,7 +1703,7 @@ struct request *elv_next_request(struct request_queue *q) if (rq->cmd_flags & REQ_DONTPREP) break; - if (q->dma_drain_size && rq->data_len) { + if (q->dma_drain_size && blk_rq_bytes(rq)) { /* * make sure space for the drain appears we * know we can do this because max_hw_segments @@ -1759,7 +1726,7 @@ struct request *elv_next_request(struct request_queue *q) * avoid resource deadlock. REQ_STARTED will * prevent other fs requests from passing this one. */ - if (q->dma_drain_size && rq->data_len && + if (q->dma_drain_size && blk_rq_bytes(rq) && !(rq->cmd_flags & REQ_DONTPREP)) { /* * remove the space for the drain we added @@ -1911,8 +1878,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) * can find how many bytes remain in the request * later. */ - req->nr_sectors = req->hard_nr_sectors = 0; - req->current_nr_sectors = req->hard_cur_sectors = 0; + req->data_len = 0; return false; } @@ -1926,8 +1892,25 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) bio_iovec(bio)->bv_len -= nr_bytes; } - blk_recalc_rq_sectors(req, total_bytes >> 9); + req->data_len -= total_bytes; + req->buffer = bio_data(req->bio); + + /* update sector only for requests with clear definition of sector */ + if (blk_fs_request(req) || blk_discard_rq(req)) + req->sector += total_bytes >> 9; + + /* + * If total number of sectors is less than the first segment + * size, something has gone terribly wrong. + */ + if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { + printk(KERN_ERR "blk: request botched\n"); + req->data_len = blk_rq_cur_bytes(req); + } + + /* recalculate the number of segments */ blk_recalc_rq_segments(req); + return true; } EXPORT_SYMBOL_GPL(blk_update_request); @@ -2049,11 +2032,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->nr_phys_segments = bio_phys_segments(q, bio); rq->buffer = bio_data(bio); } - rq->current_nr_sectors = bio_cur_sectors(bio); - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); rq->data_len = bio->bi_size; - rq->bio = rq->biotail = bio; if (bio->bi_bdev) diff --git a/block/blk-merge.c b/block/blk-merge.c index bf62a87a9da..b8df66aef0f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -9,35 +9,6 @@ #include "blk.h" -void blk_recalc_rq_sectors(struct request *rq, int nsect) -{ - if (blk_fs_request(rq) || blk_discard_rq(rq)) { - rq->hard_sector += nsect; - rq->hard_nr_sectors -= nsect; - - /* - * Move the I/O submission pointers ahead if required. - */ - if ((rq->nr_sectors >= rq->hard_nr_sectors) && - (rq->sector <= rq->hard_sector)) { - rq->sector = rq->hard_sector; - rq->nr_sectors = rq->hard_nr_sectors; - rq->hard_cur_sectors = bio_cur_sectors(rq->bio); - rq->current_nr_sectors = rq->hard_cur_sectors; - rq->buffer = bio_data(rq->bio); - } - - /* - * if total number of sectors is less than the first segment - * size, something has gone terribly wrong - */ - if (rq->nr_sectors < rq->current_nr_sectors) { - printk(KERN_ERR "blk: request botched\n"); - rq->nr_sectors = rq->current_nr_sectors; - } - } -} - static unsigned int __blk_recalc_rq_segments(struct request_queue *q, struct bio *bio) { @@ -199,8 +170,9 @@ new_segment: if (unlikely(rq->cmd_flags & REQ_COPY_USER) && - (rq->data_len & q->dma_pad_mask)) { - unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1; + (blk_rq_bytes(rq) & q->dma_pad_mask)) { + unsigned int pad_len = + (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; sg->length += pad_len; rq->extra_len += pad_len; @@ -398,7 +370,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, req->biotail->bi_next = next->bio; req->biotail = next->biotail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + req->data_len += blk_rq_bytes(next); elv_merge_requests(q, req, next); diff --git a/block/blk.h b/block/blk.h index 51115599df9..ab54529103c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -101,7 +101,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, int attempt_back_merge(struct request_queue *q, struct request *rq); int attempt_front_merge(struct request_queue *q, struct request *rq); void blk_recalc_rq_segments(struct request *rq); -void blk_recalc_rq_sectors(struct request *rq, int nsect); void blk_queue_congestion_threshold(struct request_queue *q); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index db4d990a66b..99ac4304d71 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -579,9 +579,9 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root, * Sort strictly based on sector. Smallest to the left, * largest to the right. */ - if (sector > cfqq->next_rq->sector) + if (sector > blk_rq_pos(cfqq->next_rq)) n = &(*p)->rb_right; - else if (sector < cfqq->next_rq->sector) + else if (sector < blk_rq_pos(cfqq->next_rq)) n = &(*p)->rb_left; else break; @@ -611,8 +611,8 @@ static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) return; cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; - __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector, - &parent, &p); + __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, + blk_rq_pos(cfqq->next_rq), &parent, &p); if (!__cfqq) { rb_link_node(&cfqq->p_node, parent, p); rb_insert_color(&cfqq->p_node, cfqq->p_root); @@ -996,7 +996,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, if (cfq_rq_close(cfqd, __cfqq->next_rq)) return __cfqq; - if (__cfqq->next_rq->sector < sector) + if (blk_rq_pos(__cfqq->next_rq) < sector) node = rb_next(&__cfqq->p_node); else node = rb_prev(&__cfqq->p_node); diff --git a/include/linux/bio.h b/include/linux/bio.h index f37ca8c726b..d30ec6f30dd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -218,12 +218,12 @@ struct bio { #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) -static inline unsigned int bio_cur_sectors(struct bio *bio) +static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio->bi_vcnt) - return bio_iovec(bio)->bv_len >> 9; + return bio_iovec(bio)->bv_len; else /* dataless requests such as discard */ - return bio->bi_size >> 9; + return bio->bi_size; } static inline void *bio_data(struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4e5f8559872..ce2bf5efa9b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -166,19 +166,8 @@ struct request { enum rq_cmd_type_bits cmd_type; unsigned long atomic_flags; - /* Maintain bio traversal state for part by part I/O submission. - * hard_* are block layer internals, no driver should touch them! - */ - - sector_t sector; /* next sector to submit */ - sector_t hard_sector; /* next sector to complete */ - unsigned long nr_sectors; /* no. of sectors left to submit */ - unsigned long hard_nr_sectors; /* no. of sectors left to complete */ - /* no. of sectors left to submit in the current segment */ - unsigned int current_nr_sectors; - - /* no. of sectors left to complete in the current segment */ - unsigned int hard_cur_sectors; + sector_t sector; /* sector cursor */ + unsigned int data_len; /* total data len, don't access directly */ struct bio *bio; struct bio *biotail; @@ -226,7 +215,6 @@ struct request { unsigned char __cmd[BLK_MAX_CDB]; unsigned char *cmd; - unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; unsigned int resid_len; /* residual count */ @@ -840,20 +828,27 @@ extern void blkdev_dequeue_request(struct request *req); */ static inline sector_t blk_rq_pos(const struct request *rq) { - return rq->hard_sector; + return rq->sector; +} + +static inline unsigned int blk_rq_bytes(const struct request *rq) +{ + return rq->data_len; } -extern unsigned int blk_rq_bytes(struct request *rq); -extern unsigned int blk_rq_cur_bytes(struct request *rq); +static inline int blk_rq_cur_bytes(const struct request *rq) +{ + return rq->bio ? bio_cur_bytes(rq->bio) : 0; +} static inline unsigned int blk_rq_sectors(const struct request *rq) { - return rq->hard_nr_sectors; + return blk_rq_bytes(rq) >> 9; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { - return rq->hard_cur_sectors; + return blk_rq_cur_bytes(rq) >> 9; } /* @@ -928,7 +923,7 @@ static inline void blk_end_request_all(struct request *rq, int error) */ static inline bool blk_end_request_cur(struct request *rq, int error) { - return blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } /** @@ -981,7 +976,7 @@ static inline void __blk_end_request_all(struct request *rq, int error) */ static inline bool __blk_end_request_cur(struct request *rq, int error) { - return __blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } extern void blk_complete_request(struct request *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c59b769f62b..4e462878c9c 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -171,7 +171,7 @@ enum { ELV_MQUEUE_MUST, }; -#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) +#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 42f1c11e754..5708a14bee5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -642,12 +642,12 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, - rq->cmd_len, rq->cmd); + __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, + what, rq->errors, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, - rw, what, rq->errors, 0, NULL); + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, + what, rq->errors, 0, NULL); } } @@ -854,11 +854,11 @@ void blk_add_driver_data(struct request_queue *q, return; if (blk_pc_request(rq)) - __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, - rq->errors, len, data); + __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, + BLK_TA_DRV_DATA, rq->errors, len, data); else - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9, - 0, BLK_TA_DRV_DATA, rq->errors, len, data); + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, + BLK_TA_DRV_DATA, rq->errors, len, data); } EXPORT_SYMBOL_GPL(blk_add_driver_data); -- cgit v1.2.3-70-g09d2 From 049862579333cc6cd9e6edfd6987cd0addfd8c59 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 11 May 2009 14:33:23 +0800 Subject: blktrace: pdu_buf of pc events should be unsigned I got this: 8,0 1 305.417782332 2037 I R 32 (ffffff9e 10 00 ...) [bash] It should be: 8,0 1 305.417782332 2037 I R 32 (9e 10 00 ...) [bash] [ Impact: fix output of pc events ] Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A07C6B3.9080802@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e099f8cc1d1..05b4747fd87 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1065,7 +1065,7 @@ static int blk_log_action(struct trace_iterator *iter, const char *act) static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) { - const char *pdu_buf; + const unsigned char *pdu_buf; int pdu_len; int i, end, ret; -- cgit v1.2.3-70-g09d2 From fd51d251e4cdb21f68e9dbc4336514d64a105a79 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Tue, 19 May 2009 09:59:08 +0200 Subject: blktrace: remove debugfs entries on bad path debugfs directory entries for devices are not removed on some of the failure pathes in do_blk_trace_setup(). One way to reproduce is to start blktrace on multiple devices with insufficient Vmalloc space: Devices will fail with a message like this: BLKTRACESETUP(2) /dev/sdu failed: 5/Input/output error If so, the respective entries in debugfs (e.g. /sys/kernel/debug/block/sdu) will remain and subsequent attempts to start blktrace on the respective devices will not succeed due to existing directories. [ Impact: fix /debug/tracing file cleanup corner case ] Signed-off-by: Stefan Raspl Acked-by: Li Zefan Cc: Li Zefan Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com LKML-Reference: <4A1266CC.5040801@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/blktrace.c') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 05b4747fd87..e3abf55bc8e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -262,6 +262,7 @@ static void blk_trace_free(struct blk_trace *bt) { debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); + debugfs_remove(bt->dir); relay_close(bt->rchan); free_percpu(bt->sequence); free_percpu(bt->msg_data); -- cgit v1.2.3-70-g09d2 From 55782138e47d9baf2f7d3a7af9e7cf42adf72c56 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Jun 2009 13:43:05 +0800 Subject: tracing/events: convert block trace points to TRACE_EVENT() TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions ... Cons: - no dev_t info for the output of plug, unplug_timer and unplug_io events. no dev_t info for getrq and sleeprq events if bio == NULL. no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL. This is mainly because we can't get the deivce from a request queue. But this may change in the future. - A packet command is converted to a string in TP_assign, not TP_print. While blktrace do the convertion just before output. Since pc requests should be rather rare, this is not a big issue. - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT has a unique format, which means we have some unused data in a trace entry. The overhead is minimized by using __dynamic_array() instead of __array(). I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing: dd dd + ioctl blktrace dd + TRACE_EVENT (splice) 1 7.36s, 42.7 MB/s 7.50s, 42.0 MB/s 7.41s, 42.5 MB/s 2 7.43s, 42.3 MB/s 7.48s, 42.1 MB/s 7.43s, 42.4 MB/s 3 7.38s, 42.6 MB/s 7.45s, 42.2 MB/s 7.41s, 42.5 MB/s So the overhead of tracing is very small, and no regression when using those trace events vs blktrace. And the binary output of TRACE_EVENT is much smaller than blktrace: # ls -l -h -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out Following are some comparisons between TRACE_EVENT and blktrace: plug: kjournald-480 [000] 303.084981: block_plug: [kjournald] kjournald-480 [000] 303.084981: 8,0 P N [kjournald] unplug_io: kblockd/0-118 [000] 300.052973: block_unplug_io: [kblockd/0] 1 kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1 remap: kjournald-480 [000] 303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384 kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384 bio_backmerge: kjournald-480 [000] 303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald] kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald] getrq: kjournald-480 [000] 303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald] kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald] bash-2066 [001] 1072.953770: 8,0 G N [bash] bash-2066 [001] 1072.953773: block_getrq: 0,0 N 0 + 0 [bash] rq_complete: konsole-2065 [001] 300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0] konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0] ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0] ksoftirqd/1-7 [001] 1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0] rq_insert: kjournald-480 [000] 303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald] kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald] Changelog from v2 -> v3: - use the newly introduced __dynamic_array(). Changelog from v1 -> v2: - use __string() instead of __array() to minimize the memory required to store hex dump of rq->cmd(). - support large pc requests. - add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT. - some cleanups. Signed-off-by: Li Zefan LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- block/blk-core.c | 16 +- block/elevator.c | 8 +- drivers/md/dm.c | 5 +- fs/bio.c | 3 +- include/linux/blktrace_api.h | 13 ++ include/trace/block.h | 76 ------- include/trace/events/block.h | 483 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/Makefile | 5 +- kernel/trace/blktrace.c | 78 ++++++- mm/bounce.c | 5 +- 10 files changed, 588 insertions(+), 104 deletions(-) delete mode 100644 include/trace/block.h create mode 100644 include/trace/events/block.h (limited to 'kernel/trace/blktrace.c') diff --git a/block/blk-core.c b/block/blk-core.c index 1306de9cce0..9475bf99b89 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,22 +28,14 @@ #include #include #include -#include + +#define CREATE_TRACE_POINTS +#include #include "blk.h" -DEFINE_TRACE(block_plug); -DEFINE_TRACE(block_unplug_io); -DEFINE_TRACE(block_unplug_timer); -DEFINE_TRACE(block_getrq); -DEFINE_TRACE(block_sleeprq); -DEFINE_TRACE(block_rq_requeue); -DEFINE_TRACE(block_bio_backmerge); -DEFINE_TRACE(block_bio_frontmerge); -DEFINE_TRACE(block_bio_queue); -DEFINE_TRACE(block_rq_complete); -DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); diff --git a/block/elevator.c b/block/elevator.c index 7073a907257..e220f0c543e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,17 +33,16 @@ #include #include #include -#include #include #include +#include + #include "blk.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); -DEFINE_TRACE(block_rq_abort); - /* * Merge hash stuff. */ @@ -55,9 +54,6 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -DEFINE_TRACE(block_rq_insert); -DEFINE_TRACE(block_rq_issue); - /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e2ee4a79ea2..3fd8b1e6548 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -20,7 +20,8 @@ #include #include #include -#include + +#include #define DM_MSG_PREFIX "core" @@ -53,8 +54,6 @@ struct dm_target_io { union map_info info; }; -DEFINE_TRACE(block_bio_complete); - /* * For request-based dm. * One of these is allocated per request. diff --git a/fs/bio.c b/fs/bio.c index 98711647ece..740699c4f90 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -26,10 +26,9 @@ #include #include #include -#include #include /* for struct sg_iovec */ -DEFINE_TRACE(block_split); +#include /* * Test patch to inline a certain number of bi_io_vec's inside the bio diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 82b4636030e..c7ec31dd04c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -218,5 +218,18 @@ static inline int blk_trace_init_sysfs(struct device *dev) #endif /* CONFIG_BLK_DEV_IO_TRACE */ +#ifdef CONFIG_EVENT_TRACING + +static inline int blk_cmd_buf_len(struct request *rq) +{ + return blk_pc_request(rq) ? rq->cmd_len * 3 : 1; +} + +extern void blk_dump_cmd(char *buf, struct request *rq); +extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); +extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); + +#endif /* CONFIG_EVENT_TRACING */ + #endif /* __KERNEL__ */ #endif diff --git a/include/trace/block.h b/include/trace/block.h deleted file mode 100644 index 5b12efa096b..00000000000 --- a/include/trace/block.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _TRACE_BLOCK_H -#define _TRACE_BLOCK_H - -#include -#include - -DECLARE_TRACE(block_rq_abort, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_bio_bounce, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_queue, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_getrq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_sleeprq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_plug, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_timer, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_io, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TP_ARGS(q, bio, pdu)); - -DECLARE_TRACE(block_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from), - TP_ARGS(q, bio, dev, from)); - -#endif diff --git a/include/trace/events/block.h b/include/trace/events/block.h new file mode 100644 index 00000000000..a99d1e565bb --- /dev/null +++ b/include/trace/events/block.h @@ -0,0 +1,483 @@ +#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BLOCK_H + +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM block + +TRACE_EVENT(block_rq_abort, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_insert, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_issue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_requeue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector; + __entry->nr_sector = blk_pc_request(rq) ? + 0 : rq->hard_nr_sectors; + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + __entry->sector, __entry->nr_sector, __entry->errors) +); +TRACE_EVENT(block_bio_bounce, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_complete, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __field( int, error ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->error) +); + +TRACE_EVENT(block_bio_backmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_frontmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_queue, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_getrq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_sleeprq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_plug, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s]", __entry->comm) +); + +TRACE_EVENT(block_unplug_timer, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_unplug_io, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_split, + + TP_PROTO(struct request_queue *q, struct bio *bio, + unsigned int new_sector), + + TP_ARGS(q, bio, new_sector), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( sector_t, new_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->new_sector = new_sector; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu / %llu [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->new_sector, __entry->comm) +); + +TRACE_EVENT(block_remap, + + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from), + + TP_ARGS(q, bio, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + __entry->sector, __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + __entry->old_sector) +); + +#endif /* _TRACE_BLOCK_H */ + +/* This part must be outside protection */ +#include + diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 06b85850fab..844164dca90 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -45,7 +45,10 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o obj-$(CONFIG_POWER_TRACER) += trace_power.o obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o -obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o +obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o +ifeq ($(CONFIG_BLOCK),y) +obj-$(CONFIG_EVENT_TRACING) += blktrace.o +endif obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e3abf55bc8e..7bd6a9893c2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -23,10 +23,14 @@ #include #include #include -#include #include + +#include + #include "trace_output.h" +#ifdef CONFIG_BLK_DEV_IO_TRACE + static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; @@ -1658,3 +1662,75 @@ int blk_trace_init_sysfs(struct device *dev) return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); } +#endif /* CONFIG_BLK_DEV_IO_TRACE */ + +#ifdef CONFIG_EVENT_TRACING + +void blk_dump_cmd(char *buf, struct request *rq) +{ + int i, end; + int len = rq->cmd_len; + unsigned char *cmd = rq->cmd; + + if (!blk_pc_request(rq)) { + buf[0] = '\0'; + return; + } + + for (end = len - 1; end >= 0; end--) + if (cmd[end]) + break; + end++; + + for (i = 0; i < len; i++) { + buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); + if (i == end && end != len - 1) { + sprintf(buf, " .."); + break; + } + } +} + +void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) +{ + int i = 0; + + if (rw & WRITE) + rwbs[i++] = 'W'; + else if (rw & 1 << BIO_RW_DISCARD) + rwbs[i++] = 'D'; + else if (bytes) + rwbs[i++] = 'R'; + else + rwbs[i++] = 'N'; + + if (rw & 1 << BIO_RW_AHEAD) + rwbs[i++] = 'A'; + if (rw & 1 << BIO_RW_BARRIER) + rwbs[i++] = 'B'; + if (rw & 1 << BIO_RW_SYNCIO) + rwbs[i++] = 'S'; + if (rw & 1 << BIO_RW_META) + rwbs[i++] = 'M'; + + rwbs[i] = '\0'; +} + +void blk_fill_rwbs_rq(char *rwbs, struct request *rq) +{ + int rw = rq->cmd_flags & 0x03; + int bytes; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) + bytes = rq->data_len; + else + bytes = rq->hard_nr_sectors << 9; + + blk_fill_rwbs(rwbs, rw, bytes); +} + +#endif /* CONFIG_EVENT_TRACING */ + diff --git a/mm/bounce.c b/mm/bounce.c index e590272fe7a..65f5e17e411 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -14,16 +14,15 @@ #include #include #include -#include #include +#include + #define POOL_SIZE 64 #define ISA_POOL_SIZE 16 static mempool_t *page_pool, *isa_page_pool; -DEFINE_TRACE(block_bio_bounce); - #ifdef CONFIG_HIGHMEM static __init int init_emergency_pool(void) { -- cgit v1.2.3-70-g09d2