summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
committerIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
commitbcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch)
treee420679a5db6ea4e1694eef57f9abb6acac8d4d3 /fs/xfs/xfs_buf.c
parent26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff)
parent000078bc3ee69efb1124b8478c7527389a826074 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: * Fix include order for bison/flex-generated C files, from Ben Hutchings * Build fixes and documentation corrections from David Ahern * Group parsing support, from Jiri Olsa * UI/gtk refactorings and improvements from Namhyung Kim * NULL deref fix for perf script, from Namhyung Kim * Assorted cleanups from Robert Richter * Let O= makes handle relative paths, from Steven Rostedt * perf script python fixes, from Feng Tang. * Improve 'perf lock' error message when the needed tracepoints are not present, from David Ahern. * Initial bash completion support, from Frederic Weisbecker * Allow building without libelf, from Namhyung Kim. * Support DWARF CFI based unwind to have callchains when %bp based unwinding is not possible, from Jiri Olsa. * Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF section was the end goal, several fixes for code that handles all architectures and cleanups are included, from Cody Schafer. * Add a description for the JIT interface, from Andi Kleen. * Assorted fixes for Documentation and build in 32 bit, from Robert Richter * Add support for non-tracepoint events in perf script python, from Feng Tang * Cache the libtraceevent event_format associated to each evsel early, so that we avoid relookups, i.e. calling pevent_find_event repeatedly when processing tracepoint events. [ This is to reduce the surface contact with libtraceevents and make clear what is that the perf tools needs from that lib: so far parsing the common and per event fields. ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c240
1 files changed, 176 insertions, 64 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 269b35c084d..d7a9dd735e1 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -164,14 +164,49 @@ xfs_buf_stale(
ASSERT(atomic_read(&bp->b_hold) >= 1);
}
+static int
+xfs_buf_get_maps(
+ struct xfs_buf *bp,
+ int map_count)
+{
+ ASSERT(bp->b_maps == NULL);
+ bp->b_map_count = map_count;
+
+ if (map_count == 1) {
+ bp->b_maps = &bp->b_map;
+ return 0;
+ }
+
+ bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
+ KM_NOFS);
+ if (!bp->b_maps)
+ return ENOMEM;
+ return 0;
+}
+
+/*
+ * Frees b_pages if it was allocated.
+ */
+static void
+xfs_buf_free_maps(
+ struct xfs_buf *bp)
+{
+ if (bp->b_maps != &bp->b_map) {
+ kmem_free(bp->b_maps);
+ bp->b_maps = NULL;
+ }
+}
+
struct xfs_buf *
-xfs_buf_alloc(
+_xfs_buf_alloc(
struct xfs_buftarg *target,
- xfs_daddr_t blkno,
- size_t numblks,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
+ int error;
+ int i;
bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
if (unlikely(!bp))
@@ -192,16 +227,28 @@ xfs_buf_alloc(
sema_init(&bp->b_sema, 0); /* held, no waiters */
XB_SET_OWNER(bp);
bp->b_target = target;
+ bp->b_flags = flags;
/*
* Set length and io_length to the same value initially.
* I/O routines should use io_length, which will be the same in
* most cases but may be reset (e.g. XFS recovery).
*/
- bp->b_length = numblks;
- bp->b_io_length = numblks;
- bp->b_flags = flags;
- bp->b_bn = blkno;
+ error = xfs_buf_get_maps(bp, nmaps);
+ if (error) {
+ kmem_zone_free(xfs_buf_zone, bp);
+ return NULL;
+ }
+
+ bp->b_bn = map[0].bm_bn;
+ bp->b_length = 0;
+ for (i = 0; i < nmaps; i++) {
+ bp->b_maps[i].bm_bn = map[i].bm_bn;
+ bp->b_maps[i].bm_len = map[i].bm_len;
+ bp->b_length += map[i].bm_len;
+ }
+ bp->b_io_length = bp->b_length;
+
atomic_set(&bp->b_pin_count, 0);
init_waitqueue_head(&bp->b_waiters);
@@ -280,6 +327,7 @@ xfs_buf_free(
} else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp);
+ xfs_buf_free_maps(bp);
kmem_zone_free(xfs_buf_zone, bp);
}
@@ -327,8 +375,9 @@ xfs_buf_allocate_memory(
}
use_alloc_page:
- start = BBTOB(bp->b_bn) >> PAGE_SHIFT;
- end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT;
+ end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
page_count = end - start;
error = _xfs_buf_get_pages(bp, page_count, flags);
if (unlikely(error))
@@ -425,8 +474,8 @@ _xfs_buf_map_pages(
xfs_buf_t *
_xfs_buf_find(
struct xfs_buftarg *btp,
- xfs_daddr_t blkno,
- size_t numblks,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags,
xfs_buf_t *new_bp)
{
@@ -435,7 +484,12 @@ _xfs_buf_find(
struct rb_node **rbp;
struct rb_node *parent;
xfs_buf_t *bp;
+ xfs_daddr_t blkno = map[0].bm_bn;
+ int numblks = 0;
+ int i;
+ for (i = 0; i < nmaps; i++)
+ numblks += map[i].bm_len;
numbytes = BBTOB(numblks);
/* Check for IOs smaller than the sector size / not sector aligned */
@@ -527,31 +581,31 @@ found:
* more hits than misses.
*/
struct xfs_buf *
-xfs_buf_get(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks,
+xfs_buf_get_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
struct xfs_buf *new_bp;
int error = 0;
- bp = _xfs_buf_find(target, blkno, numblks, flags, NULL);
+ bp = _xfs_buf_find(target, map, nmaps, flags, NULL);
if (likely(bp))
goto found;
- new_bp = xfs_buf_alloc(target, blkno, numblks, flags);
+ new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
if (unlikely(!new_bp))
return NULL;
error = xfs_buf_allocate_memory(new_bp, flags);
if (error) {
- kmem_zone_free(xfs_buf_zone, new_bp);
+ xfs_buf_free(new_bp);
return NULL;
}
- bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp);
+ bp = _xfs_buf_find(target, map, nmaps, flags, new_bp);
if (!bp) {
xfs_buf_free(new_bp);
return NULL;
@@ -560,8 +614,6 @@ xfs_buf_get(
if (bp != new_bp)
xfs_buf_free(new_bp);
- bp->b_io_length = bp->b_length;
-
found:
if (!bp->b_addr) {
error = _xfs_buf_map_pages(bp, flags);
@@ -584,7 +636,7 @@ _xfs_buf_read(
xfs_buf_flags_t flags)
{
ASSERT(!(flags & XBF_WRITE));
- ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+ ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL);
bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -596,17 +648,17 @@ _xfs_buf_read(
}
xfs_buf_t *
-xfs_buf_read(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks,
+xfs_buf_read_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
flags |= XBF_READ;
- bp = xfs_buf_get(target, blkno, numblks, flags);
+ bp = xfs_buf_get_map(target, map, nmaps, flags);
if (bp) {
trace_xfs_buf_read(bp, flags, _RET_IP_);
@@ -634,15 +686,15 @@ xfs_buf_read(
* safe manner.
*/
void
-xfs_buf_readahead(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks)
+xfs_buf_readahead_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps)
{
if (bdi_read_congested(target->bt_bdi))
return;
- xfs_buf_read(target, blkno, numblks,
+ xfs_buf_read_map(target, map, nmaps,
XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
}
@@ -665,8 +717,10 @@ xfs_buf_read_uncached(
return NULL;
/* set up the buffer for a read IO */
- XFS_BUF_SET_ADDR(bp, daddr);
- XFS_BUF_READ(bp);
+ ASSERT(bp->b_map_count == 1);
+ bp->b_bn = daddr;
+ bp->b_maps[0].bm_bn = daddr;
+ bp->b_flags |= XBF_READ;
xfsbdstrat(target->bt_mount, bp);
error = xfs_buf_iowait(bp);
@@ -694,7 +748,11 @@ xfs_buf_set_empty(
bp->b_addr = NULL;
bp->b_length = numblks;
bp->b_io_length = numblks;
+
+ ASSERT(bp->b_map_count == 1);
bp->b_bn = XFS_BUF_DADDR_NULL;
+ bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
+ bp->b_maps[0].bm_len = bp->b_length;
}
static inline struct page *
@@ -758,9 +816,10 @@ xfs_buf_get_uncached(
{
unsigned long page_count;
int error, i;
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
+ DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
- bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
+ bp = _xfs_buf_alloc(target, &map, 1, 0);
if (unlikely(bp == NULL))
goto fail;
@@ -791,6 +850,7 @@ xfs_buf_get_uncached(
__free_page(bp->b_pages[i]);
_xfs_buf_free_pages(bp);
fail_free_buf:
+ xfs_buf_free_maps(bp);
kmem_zone_free(xfs_buf_zone, bp);
fail:
return NULL;
@@ -1144,36 +1204,39 @@ xfs_buf_bio_end_io(
bio_put(bio);
}
-STATIC void
-_xfs_buf_ioapply(
- xfs_buf_t *bp)
+static void
+xfs_buf_ioapply_map(
+ struct xfs_buf *bp,
+ int map,
+ int *buf_offset,
+ int *count,
+ int rw)
{
- int rw, map_i, total_nr_pages, nr_pages;
- struct bio *bio;
- int offset = bp->b_offset;
- int size = BBTOB(bp->b_io_length);
- sector_t sector = bp->b_bn;
+ int page_index;
+ int total_nr_pages = bp->b_page_count;
+ int nr_pages;
+ struct bio *bio;
+ sector_t sector = bp->b_maps[map].bm_bn;
+ int size;
+ int offset;
total_nr_pages = bp->b_page_count;
- map_i = 0;
- if (bp->b_flags & XBF_WRITE) {
- if (bp->b_flags & XBF_SYNCIO)
- rw = WRITE_SYNC;
- else
- rw = WRITE;
- if (bp->b_flags & XBF_FUA)
- rw |= REQ_FUA;
- if (bp->b_flags & XBF_FLUSH)
- rw |= REQ_FLUSH;
- } else if (bp->b_flags & XBF_READ_AHEAD) {
- rw = READA;
- } else {
- rw = READ;
+ /* skip the pages in the buffer before the start offset */
+ page_index = 0;
+ offset = *buf_offset;
+ while (offset >= PAGE_SIZE) {
+ page_index++;
+ offset -= PAGE_SIZE;
}
- /* we only use the buffer cache for meta-data */
- rw |= REQ_META;
+ /*
+ * Limit the IO size to the length of the current vector, and update the
+ * remaining IO count for the next time around.
+ */
+ size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
+ *count -= size;
+ *buf_offset += size;
next_chunk:
atomic_inc(&bp->b_io_remaining);
@@ -1188,13 +1251,14 @@ next_chunk:
bio->bi_private = bp;
- for (; size && nr_pages; nr_pages--, map_i++) {
+ for (; size && nr_pages; nr_pages--, page_index++) {
int rbytes, nbytes = PAGE_SIZE - offset;
if (nbytes > size)
nbytes = size;
- rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+ rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
+ offset);
if (rbytes < nbytes)
break;
@@ -1216,6 +1280,54 @@ next_chunk:
xfs_buf_ioerror(bp, EIO);
bio_put(bio);
}
+
+}
+
+STATIC void
+_xfs_buf_ioapply(
+ struct xfs_buf *bp)
+{
+ struct blk_plug plug;
+ int rw;
+ int offset;
+ int size;
+ int i;
+
+ if (bp->b_flags & XBF_WRITE) {
+ if (bp->b_flags & XBF_SYNCIO)
+ rw = WRITE_SYNC;
+ else
+ rw = WRITE;
+ if (bp->b_flags & XBF_FUA)
+ rw |= REQ_FUA;
+ if (bp->b_flags & XBF_FLUSH)
+ rw |= REQ_FLUSH;
+ } else if (bp->b_flags & XBF_READ_AHEAD) {
+ rw = READA;
+ } else {
+ rw = READ;
+ }
+
+ /* we only use the buffer cache for meta-data */
+ rw |= REQ_META;
+
+ /*
+ * Walk all the vectors issuing IO on them. Set up the initial offset
+ * into the buffer and the desired IO size before we start -
+ * _xfs_buf_ioapply_vec() will modify them appropriately for each
+ * subsequent call.
+ */
+ offset = bp->b_offset;
+ size = BBTOB(bp->b_io_length);
+ blk_start_plug(&plug);
+ for (i = 0; i < bp->b_map_count; i++) {
+ xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
+ if (bp->b_error)
+ break;
+ if (size <= 0)
+ break; /* all done */
+ }
+ blk_finish_plug(&plug);
}
void
@@ -1557,7 +1669,7 @@ xfs_buf_cmp(
struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
xfs_daddr_t diff;
- diff = ap->b_bn - bp->b_bn;
+ diff = ap->b_map.bm_bn - bp->b_map.bm_bn;
if (diff < 0)
return -1;
if (diff > 0)