diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 13:39:11 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 13:39:11 -0800 |
commit | 9228ff90387e276ad67b10c0eb525c9d6a57d5e9 (patch) | |
tree | e7c87b68daba7cf7ca4c342c6b52165bd78fbe16 /drivers/block/xen-blkback | |
parent | 9360b53661a2c7754517b2925580055bacc8ec38 (diff) | |
parent | d2ec180c23a5a1bfe34d8638b0342a47c00cf70f (diff) |
Merge branch 'for-3.8/drivers' of git://git.kernel.dk/linux-block
Pull block driver update from Jens Axboe:
"Now that the core bits are in, here are the driver bits for 3.8. The
branch contains:
- A huge pile of drbd bits that were dumped from the 3.7 merge
window. Following that, it was both made perfectly clear that
there is going to be no more over-the-wall pulls and how the
situation on individual pulls can be improved.
- A few cleanups from Akinobu Mita for drbd and cciss.
- Queue improvement for loop from Lukas. This grew into adding a
generic interface for waiting/checking an even with a specific
lock, allowing this to be pulled out of md and now loop and drbd is
also using it.
- A few fixes for xen back/front block driver from Roger Pau Monne.
- Partition improvements from Stephen Warren, allowing partiion UUID
to be used as an identifier."
* 'for-3.8/drivers' of git://git.kernel.dk/linux-block: (609 commits)
drbd: update Kconfig to match current dependencies
drbd: Fix drbdsetup wait-connect, wait-sync etc... commands
drbd: close race between drbd_set_role and drbd_connect
drbd: respect no-md-barriers setting also when changed online via disk-options
drbd: Remove obsolete check
drbd: fixup after wait_even_lock_irq() addition to generic code
loop: Limit the number of requests in the bio list
wait: add wait_event_lock_irq() interface
xen-blkfront: free allocated page
xen-blkback: move free persistent grants code
block: partition: msdos: provide UUIDs for partitions
init: reduce PARTUUID min length to 1 from 36
block: store partition_meta_info.uuid as a string
cciss: use check_signature()
cciss: cleanup bitops usage
drbd: use copy_highpage
drbd: if the replication link breaks during handshake, keep retrying
drbd: check return of kmalloc in receive_uuids
drbd: Broadcast sync progress no more often than once per second
drbd: don't try to clear bits once the disk has failed
...
Diffstat (limited to 'drivers/block/xen-blkback')
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 301 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 16 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 23 |
3 files changed, 313 insertions, 27 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 280a13846e6..74374fb762a 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -39,6 +39,7 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/freezer.h> +#include <linux/bitmap.h> #include <xen/events.h> #include <xen/page.h> @@ -79,6 +80,7 @@ struct pending_req { unsigned short operation; int status; struct list_head free_list; + DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); }; #define BLKBACK_INVALID_HANDLE (~0) @@ -99,6 +101,36 @@ struct xen_blkbk { static struct xen_blkbk *blkbk; /* + * Maximum number of grant pages that can be mapped in blkback. + * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of + * pages that blkback will persistently map. + * Currently, this is: + * RING_SIZE = 32 (for all known ring types) + * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11 + * sizeof(struct persistent_gnt) = 48 + * So the maximum memory used to store the grants is: + * 32 * 11 * 48 = 16896 bytes + */ +static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol) +{ + switch (protocol) { + case BLKIF_PROTOCOL_NATIVE: + return __CONST_RING_SIZE(blkif, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + case BLKIF_PROTOCOL_X86_32: + return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + case BLKIF_PROTOCOL_X86_64: + return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + default: + BUG(); + } + return 0; +} + + +/* * Little helpful macro to figure out the index and virtual address of the * pending_pages[..]. For each 'pending_req' we have have up to * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through @@ -129,6 +161,90 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st); +#define foreach_grant(pos, rbtree, node) \ + for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ + &(pos)->node != NULL; \ + (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) + + +static void add_persistent_gnt(struct rb_root *root, + struct persistent_gnt *persistent_gnt) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct persistent_gnt *this; + + /* Figure out where to put new node */ + while (*new) { + this = container_of(*new, struct persistent_gnt, node); + + parent = *new; + if (persistent_gnt->gnt < this->gnt) + new = &((*new)->rb_left); + else if (persistent_gnt->gnt > this->gnt) + new = &((*new)->rb_right); + else { + pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n"); + BUG(); + } + } + + /* Add new node and rebalance tree. */ + rb_link_node(&(persistent_gnt->node), parent, new); + rb_insert_color(&(persistent_gnt->node), root); +} + +static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, + grant_ref_t gref) +{ + struct persistent_gnt *data; + struct rb_node *node = root->rb_node; + + while (node) { + data = container_of(node, struct persistent_gnt, node); + + if (gref < data->gnt) + node = node->rb_left; + else if (gref > data->gnt) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void free_persistent_gnts(struct rb_root *root, unsigned int num) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt; + int ret = 0; + int segs_to_unmap = 0; + + foreach_grant(persistent_gnt, root, node) { + BUG_ON(persistent_gnt->handle == + BLKBACK_INVALID_HANDLE); + gnttab_set_unmap_op(&unmap[segs_to_unmap], + (unsigned long) pfn_to_kaddr(page_to_pfn( + persistent_gnt->page)), + GNTMAP_host_map, + persistent_gnt->handle); + + pages[segs_to_unmap] = persistent_gnt->page; + rb_erase(&persistent_gnt->node, root); + kfree(persistent_gnt); + num--; + + if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || + !rb_next(&persistent_gnt->node)) { + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); + BUG_ON(ret); + segs_to_unmap = 0; + } + } + BUG_ON(num != 0); +} + /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ @@ -302,6 +418,14 @@ int xen_blkif_schedule(void *arg) print_stats(blkif); } + /* Free all persistent grant pages */ + if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) + free_persistent_gnts(&blkif->persistent_gnts, + blkif->persistent_gnt_c); + + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + blkif->persistent_gnt_c = 0; + if (log_stats) print_stats(blkif); @@ -328,6 +452,8 @@ static void xen_blkbk_unmap(struct pending_req *req) int ret; for (i = 0; i < req->nr_pages; i++) { + if (!test_bit(i, req->unmap_seg)) + continue; handle = pending_handle(req, i); if (handle == BLKBACK_INVALID_HANDLE) continue; @@ -344,12 +470,26 @@ static void xen_blkbk_unmap(struct pending_req *req) static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, - struct seg_buf seg[]) + struct seg_buf seg[], + struct page *pages[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int i; + struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt = NULL; + struct xen_blkif *blkif = pending_req->blkif; + phys_addr_t addr = 0; + int i, j; + bool new_map; int nseg = req->u.rw.nr_segments; + int segs_to_map = 0; int ret = 0; + int use_persistent_gnts; + + use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); + + BUG_ON(blkif->persistent_gnt_c > + max_mapped_grant_pages(pending_req->blkif->blk_protocol)); /* * Fill out preq.nr_sects with proper amount of sectors, and setup @@ -359,36 +499,146 @@ static int xen_blkbk_map(struct blkif_request *req, for (i = 0; i < nseg; i++) { uint32_t flags; - flags = GNTMAP_host_map; - if (pending_req->operation != BLKIF_OP_READ) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, - pending_req->blkif->domid); + if (use_persistent_gnts) + persistent_gnt = get_persistent_gnt( + &blkif->persistent_gnts, + req->u.rw.seg[i].gref); + + if (persistent_gnt) { + /* + * We are using persistent grants and + * the grant is already mapped + */ + new_map = false; + } else if (use_persistent_gnts && + blkif->persistent_gnt_c < + max_mapped_grant_pages(blkif->blk_protocol)) { + /* + * We are using persistent grants, the grant is + * not mapped but we have room for it + */ + new_map = true; + persistent_gnt = kmalloc( + sizeof(struct persistent_gnt), + GFP_KERNEL); + if (!persistent_gnt) + return -ENOMEM; + persistent_gnt->page = alloc_page(GFP_KERNEL); + if (!persistent_gnt->page) { + kfree(persistent_gnt); + return -ENOMEM; + } + persistent_gnt->gnt = req->u.rw.seg[i].gref; + persistent_gnt->handle = BLKBACK_INVALID_HANDLE; + + pages_to_gnt[segs_to_map] = + persistent_gnt->page; + addr = (unsigned long) pfn_to_kaddr( + page_to_pfn(persistent_gnt->page)); + + add_persistent_gnt(&blkif->persistent_gnts, + persistent_gnt); + blkif->persistent_gnt_c++; + pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", + persistent_gnt->gnt, blkif->persistent_gnt_c, + max_mapped_grant_pages(blkif->blk_protocol)); + } else { + /* + * We are either using persistent grants and + * hit the maximum limit of grants mapped, + * or we are not using persistent grants. + */ + if (use_persistent_gnts && + !blkif->vbd.overflow_max_grants) { + blkif->vbd.overflow_max_grants = 1; + pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", + blkif->domid, blkif->vbd.handle); + } + new_map = true; + pages[i] = blkbk->pending_page(pending_req, i); + addr = vaddr(pending_req, i); + pages_to_gnt[segs_to_map] = + blkbk->pending_page(pending_req, i); + } + + if (persistent_gnt) { + pages[i] = persistent_gnt->page; + persistent_gnts[i] = persistent_gnt; + } else { + persistent_gnts[i] = NULL; + } + + if (new_map) { + flags = GNTMAP_host_map; + if (!persistent_gnt && + (pending_req->operation != BLKIF_OP_READ)) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[segs_to_map++], addr, + flags, req->u.rw.seg[i].gref, + blkif->domid); + } } - ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); - BUG_ON(ret); + if (segs_to_map) { + ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); + BUG_ON(ret); + } /* * Now swizzle the MFN in our domain with the MFN from the other domain * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); - map[i].handle = BLKBACK_INVALID_HANDLE; - ret |= 1; + bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + for (i = 0, j = 0; i < nseg; i++) { + if (!persistent_gnts[i] || + persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) { + /* This is a newly mapped grant */ + BUG_ON(j >= segs_to_map); + if (unlikely(map[j].status != 0)) { + pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); + map[j].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + if (persistent_gnts[i]) { + rb_erase(&persistent_gnts[i]->node, + &blkif->persistent_gnts); + blkif->persistent_gnt_c--; + kfree(persistent_gnts[i]); + persistent_gnts[i] = NULL; + } + } + } + if (persistent_gnts[i]) { + if (persistent_gnts[i]->handle == + BLKBACK_INVALID_HANDLE) { + /* + * If this is a new persistent grant + * save the handler + */ + persistent_gnts[i]->handle = map[j].handle; + persistent_gnts[i]->dev_bus_addr = + map[j++].dev_bus_addr; + } + pending_handle(pending_req, i) = + persistent_gnts[i]->handle; + + if (ret) + continue; + + seg[i].buf = persistent_gnts[i]->dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } else { + pending_handle(pending_req, i) = map[j].handle; + bitmap_set(pending_req->unmap_seg, i, 1); + + if (ret) { + j++; + continue; + } + + seg[i].buf = map[j++].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); } - - pending_handle(pending_req, i) = map[i].handle; - - if (ret) - continue; - - seg[i].buf = map[i].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); } return ret; } @@ -591,6 +841,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int operation; struct blk_plug plug; bool drain = false; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; switch (req->operation) { case BLKIF_OP_READ: @@ -677,7 +928,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg, pages)) goto fail_flush; /* @@ -689,7 +940,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, for (i = 0; i < nseg; i++) { while ((bio == NULL) || (bio_add_page(bio, - blkbk->pending_page(pending_req, i), + pages[i], seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9a54623e52d..6072390c7f5 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -34,6 +34,7 @@ #include <linux/vmalloc.h> #include <linux/wait.h> #include <linux/io.h> +#include <linux/rbtree.h> #include <asm/setup.h> #include <asm/pgalloc.h> #include <asm/hypervisor.h> @@ -160,10 +161,21 @@ struct xen_vbd { sector_t size; unsigned int flush_support:1; unsigned int discard_secure:1; + unsigned int feature_gnt_persistent:1; + unsigned int overflow_max_grants:1; }; struct backend_info; + +struct persistent_gnt { + struct page *page; + grant_ref_t gnt; + grant_handle_t handle; + uint64_t dev_bus_addr; + struct rb_node node; +}; + struct xen_blkif { /* Unique identifier for this interface. */ domid_t domid; @@ -190,6 +202,10 @@ struct xen_blkif { struct task_struct *xenblkd; unsigned int waiting_reqs; + /* tree to store persistent grants */ + struct rb_root persistent_gnts; + unsigned int persistent_gnt_c; + /* statistics */ unsigned long st_print; int st_rd_req; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f58434c2617..63980722db4 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -117,6 +117,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) atomic_set(&blkif->drain, 0); blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); + blkif->persistent_gnts.rb_node = NULL; return blkif; } @@ -672,6 +673,13 @@ again: xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); + err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/feature-persistent", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { @@ -720,6 +728,7 @@ static int connect_ring(struct backend_info *be) struct xenbus_device *dev = be->dev; unsigned long ring_ref; unsigned int evtchn; + unsigned int pers_grants; char protocol[64] = ""; int err; @@ -749,8 +758,18 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); + err = xenbus_gather(XBT_NIL, dev->otherend, + "feature-persistent", "%u", + &pers_grants, NULL); + if (err) + pers_grants = 0; + + be->blkif->vbd.feature_gnt_persistent = pers_grants; + be->blkif->vbd.overflow_max_grants = 0; + + pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol, + pers_grants ? "persistent grants" : ""); /* Map the shared frame, irq etc. */ err = xen_blkif_map(be->blkif, ring_ref, evtchn); |