diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 292 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 17 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 23 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 197 |
4 files changed, 474 insertions, 55 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 280a13846e6..d7dd5cbdac5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -39,6 +39,7 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/freezer.h> +#include <linux/bitmap.h> #include <xen/events.h> #include <xen/page.h> @@ -79,6 +80,7 @@ struct pending_req { unsigned short operation; int status; struct list_head free_list; + DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); }; #define BLKBACK_INVALID_HANDLE (~0) @@ -99,6 +101,36 @@ struct xen_blkbk { static struct xen_blkbk *blkbk; /* + * Maximum number of grant pages that can be mapped in blkback. + * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of + * pages that blkback will persistently map. + * Currently, this is: + * RING_SIZE = 32 (for all known ring types) + * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11 + * sizeof(struct persistent_gnt) = 48 + * So the maximum memory used to store the grants is: + * 32 * 11 * 48 = 16896 bytes + */ +static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol) +{ + switch (protocol) { + case BLKIF_PROTOCOL_NATIVE: + return __CONST_RING_SIZE(blkif, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + case BLKIF_PROTOCOL_X86_32: + return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + case BLKIF_PROTOCOL_X86_64: + return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) * + BLKIF_MAX_SEGMENTS_PER_REQUEST; + default: + BUG(); + } + return 0; +} + + +/* * Little helpful macro to figure out the index and virtual address of the * pending_pages[..]. For each 'pending_req' we have have up to * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through @@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st); +#define foreach_grant(pos, rbtree, node) \ + for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ + &(pos)->node != NULL; \ + (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) + + +static void add_persistent_gnt(struct rb_root *root, + struct persistent_gnt *persistent_gnt) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct persistent_gnt *this; + + /* Figure out where to put new node */ + while (*new) { + this = container_of(*new, struct persistent_gnt, node); + + parent = *new; + if (persistent_gnt->gnt < this->gnt) + new = &((*new)->rb_left); + else if (persistent_gnt->gnt > this->gnt) + new = &((*new)->rb_right); + else { + pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n"); + BUG(); + } + } + + /* Add new node and rebalance tree. */ + rb_link_node(&(persistent_gnt->node), parent, new); + rb_insert_color(&(persistent_gnt->node), root); +} + +static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, + grant_ref_t gref) +{ + struct persistent_gnt *data; + struct rb_node *node = root->rb_node; + + while (node) { + data = container_of(node, struct persistent_gnt, node); + + if (gref < data->gnt) + node = node->rb_left; + else if (gref > data->gnt) + node = node->rb_right; + else + return data; + } + return NULL; +} + /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ @@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; struct xen_vbd *vbd = &blkif->vbd; + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt; + int ret = 0; + int segs_to_unmap = 0; xen_blkif_get(blkif); @@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg) print_stats(blkif); } + /* Free all persistent grant pages */ + if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) { + foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) { + BUG_ON(persistent_gnt->handle == + BLKBACK_INVALID_HANDLE); + gnttab_set_unmap_op(&unmap[segs_to_unmap], + (unsigned long) pfn_to_kaddr(page_to_pfn( + persistent_gnt->page)), + GNTMAP_host_map, + persistent_gnt->handle); + + pages[segs_to_unmap] = persistent_gnt->page; + rb_erase(&persistent_gnt->node, + &blkif->persistent_gnts); + kfree(persistent_gnt); + blkif->persistent_gnt_c--; + + if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || + !rb_next(&persistent_gnt->node)) { + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); + BUG_ON(ret); + segs_to_unmap = 0; + } + } + } + + BUG_ON(blkif->persistent_gnt_c != 0); + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + if (log_stats) print_stats(blkif); @@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req) int ret; for (i = 0; i < req->nr_pages; i++) { + if (!test_bit(i, req->unmap_seg)) + continue; handle = pending_handle(req, i); if (handle == BLKBACK_INVALID_HANDLE) continue; @@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req) static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, - struct seg_buf seg[]) + struct seg_buf seg[], + struct page *pages[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int i; + struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt = NULL; + struct xen_blkif *blkif = pending_req->blkif; + phys_addr_t addr = 0; + int i, j; + bool new_map; int nseg = req->u.rw.nr_segments; + int segs_to_map = 0; int ret = 0; + int use_persistent_gnts; + + use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); + + BUG_ON(blkif->persistent_gnt_c > + max_mapped_grant_pages(pending_req->blkif->blk_protocol)); /* * Fill out preq.nr_sects with proper amount of sectors, and setup @@ -359,36 +493,143 @@ static int xen_blkbk_map(struct blkif_request *req, for (i = 0; i < nseg; i++) { uint32_t flags; - flags = GNTMAP_host_map; - if (pending_req->operation != BLKIF_OP_READ) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, - pending_req->blkif->domid); + if (use_persistent_gnts) + persistent_gnt = get_persistent_gnt( + &blkif->persistent_gnts, + req->u.rw.seg[i].gref); + + if (persistent_gnt) { + /* + * We are using persistent grants and + * the grant is already mapped + */ + new_map = false; + } else if (use_persistent_gnts && + blkif->persistent_gnt_c < + max_mapped_grant_pages(blkif->blk_protocol)) { + /* + * We are using persistent grants, the grant is + * not mapped but we have room for it + */ + new_map = true; + persistent_gnt = kzalloc( + sizeof(struct persistent_gnt), + GFP_KERNEL); + if (!persistent_gnt) + return -ENOMEM; + persistent_gnt->page = alloc_page(GFP_KERNEL); + if (!persistent_gnt->page) { + kfree(persistent_gnt); + return -ENOMEM; + } + persistent_gnt->gnt = req->u.rw.seg[i].gref; + + pages_to_gnt[segs_to_map] = + persistent_gnt->page; + addr = (unsigned long) pfn_to_kaddr( + page_to_pfn(persistent_gnt->page)); + + add_persistent_gnt(&blkif->persistent_gnts, + persistent_gnt); + blkif->persistent_gnt_c++; + pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", + persistent_gnt->gnt, blkif->persistent_gnt_c, + max_mapped_grant_pages(blkif->blk_protocol)); + } else { + /* + * We are either using persistent grants and + * hit the maximum limit of grants mapped, + * or we are not using persistent grants. + */ + if (use_persistent_gnts && + !blkif->vbd.overflow_max_grants) { + blkif->vbd.overflow_max_grants = 1; + pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", + blkif->domid, blkif->vbd.handle); + } + new_map = true; + pages[i] = blkbk->pending_page(pending_req, i); + addr = vaddr(pending_req, i); + pages_to_gnt[segs_to_map] = + blkbk->pending_page(pending_req, i); + } + + if (persistent_gnt) { + pages[i] = persistent_gnt->page; + persistent_gnts[i] = persistent_gnt; + } else { + persistent_gnts[i] = NULL; + } + + if (new_map) { + flags = GNTMAP_host_map; + if (!persistent_gnt && + (pending_req->operation != BLKIF_OP_READ)) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[segs_to_map++], addr, + flags, req->u.rw.seg[i].gref, + blkif->domid); + } } - ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg); - BUG_ON(ret); + if (segs_to_map) { + ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); + BUG_ON(ret); + } /* * Now swizzle the MFN in our domain with the MFN from the other domain * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); - map[i].handle = BLKBACK_INVALID_HANDLE; - ret |= 1; + bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + for (i = 0, j = 0; i < nseg; i++) { + if (!persistent_gnts[i] || !persistent_gnts[i]->handle) { + /* This is a newly mapped grant */ + BUG_ON(j >= segs_to_map); + if (unlikely(map[j].status != 0)) { + pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); + map[j].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + if (persistent_gnts[i]) { + rb_erase(&persistent_gnts[i]->node, + &blkif->persistent_gnts); + blkif->persistent_gnt_c--; + kfree(persistent_gnts[i]); + persistent_gnts[i] = NULL; + } + } + } + if (persistent_gnts[i]) { + if (!persistent_gnts[i]->handle) { + /* + * If this is a new persistent grant + * save the handler + */ + persistent_gnts[i]->handle = map[j].handle; + persistent_gnts[i]->dev_bus_addr = + map[j++].dev_bus_addr; + } + pending_handle(pending_req, i) = + persistent_gnts[i]->handle; + + if (ret) + continue; + + seg[i].buf = persistent_gnts[i]->dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } else { + pending_handle(pending_req, i) = map[j].handle; + bitmap_set(pending_req->unmap_seg, i, 1); + + if (ret) { + j++; + continue; + } + + seg[i].buf = map[j++].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); } - - pending_handle(pending_req, i) = map[i].handle; - - if (ret) - continue; - - seg[i].buf = map[i].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); } return ret; } @@ -591,6 +832,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int operation; struct blk_plug plug; bool drain = false; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; switch (req->operation) { case BLKIF_OP_READ: @@ -677,7 +919,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg, pages)) goto fail_flush; /* @@ -689,7 +931,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, for (i = 0; i < nseg; i++) { while ((bio == NULL) || (bio_add_page(bio, - blkbk->pending_page(pending_req, i), + pages[i], seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9ad3b5ec1dc..ae7951f0e26 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -34,6 +34,7 @@ #include <linux/vmalloc.h> #include <linux/wait.h> #include <linux/io.h> +#include <linux/rbtree.h> #include <asm/setup.h> #include <asm/pgalloc.h> #include <asm/hypervisor.h> @@ -160,10 +161,22 @@ struct xen_vbd { sector_t size; bool flush_support; bool discard_secure; + + unsigned int feature_gnt_persistent:1; + unsigned int overflow_max_grants:1; }; struct backend_info; + +struct persistent_gnt { + struct page *page; + grant_ref_t gnt; + grant_handle_t handle; + uint64_t dev_bus_addr; + struct rb_node node; +}; + struct xen_blkif { /* Unique identifier for this interface. */ domid_t domid; @@ -190,6 +203,10 @@ struct xen_blkif { struct task_struct *xenblkd; unsigned int waiting_reqs; + /* tree to store persistent grants */ + struct rb_root persistent_gnts; + unsigned int persistent_gnt_c; + /* statistics */ unsigned long st_print; int st_rd_req; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 4f66171c668..b2250265308 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -118,6 +118,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) atomic_set(&blkif->drain, 0); blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); + blkif->persistent_gnts.rb_node = NULL; return blkif; } @@ -673,6 +674,13 @@ again: xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); + err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/feature-persistent", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { @@ -721,6 +729,7 @@ static int connect_ring(struct backend_info *be) struct xenbus_device *dev = be->dev; unsigned long ring_ref; unsigned int evtchn; + unsigned int pers_grants; char protocol[64] = ""; int err; @@ -750,8 +759,18 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); + err = xenbus_gather(XBT_NIL, dev->otherend, + "feature-persistent-grants", "%u", + &pers_grants, NULL); + if (err) + pers_grants = 0; + + be->blkif->vbd.feature_gnt_persistent = pers_grants; + be->blkif->vbd.overflow_max_grants = 0; + + pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol, + pers_grants ? "persistent grants" : ""); /* Map the shared frame, irq etc. */ err = xen_blkif_map(be->blkif, ring_ref, evtchn); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 007db8986e8..911d733d21b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -44,6 +44,7 @@ #include <linux/mutex.h> #include <linux/scatterlist.h> #include <linux/bitmap.h> +#include <linux/llist.h> #include <xen/xen.h> #include <xen/xenbus.h> @@ -64,10 +65,17 @@ enum blkif_state { BLKIF_STATE_SUSPENDED, }; +struct grant { + grant_ref_t gref; + unsigned long pfn; + struct llist_node node; +}; + struct blk_shadow { struct blkif_request req; struct request *request; unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; static DEFINE_MUTEX(blkfront_mutex); @@ -97,6 +105,8 @@ struct blkfront_info struct work_struct work; struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; + struct llist_head persistent_gnts; + unsigned int persistent_gnts_c; unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; @@ -104,6 +114,7 @@ struct blkfront_info unsigned int feature_secdiscard:1; unsigned int discard_granularity; unsigned int discard_alignment; + unsigned int feature_persistent:1; int is_ready; }; @@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req) unsigned long id; unsigned int fsect, lsect; int i, ref; + + /* + * Used to store if we are able to queue the request by just using + * existing persistent grants, or if we have to get new grants, + * as there are not sufficiently many free. + */ + bool new_persistent_gnts; grant_ref_t gref_head; + struct page *granted_page; + struct grant *gnt_list_entry = NULL; struct scatterlist *sg; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; - if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { - gnttab_request_free_callback( - &info->callback, - blkif_restart_queue_callback, - info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); - return 1; - } + /* Check if we have enought grants to allocate a requests */ + if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) { + new_persistent_gnts = 1; + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c, + &gref_head) < 0) { + gnttab_request_free_callback( + &info->callback, + blkif_restart_queue_callback, + info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + } else + new_persistent_gnts = 0; /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); @@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req) BLKIF_MAX_SEGMENTS_PER_REQUEST); for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { - buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - BUG_ON(ref == -ENOSPC); - gnttab_grant_foreign_access_ref( - ref, + if (info->persistent_gnts_c) { + BUG_ON(llist_empty(&info->persistent_gnts)); + gnt_list_entry = llist_entry( + llist_del_first(&info->persistent_gnts), + struct grant, node); + + ref = gnt_list_entry->gref; + buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); + info->persistent_gnts_c--; + } else { + ref = gnttab_claim_grant_reference(&gref_head); + BUG_ON(ref == -ENOSPC); + + gnt_list_entry = + kmalloc(sizeof(struct grant), + GFP_ATOMIC); + if (!gnt_list_entry) + return -ENOMEM; + + granted_page = alloc_page(GFP_ATOMIC); + if (!granted_page) { + kfree(gnt_list_entry); + return -ENOMEM; + } + + gnt_list_entry->pfn = + page_to_pfn(granted_page); + gnt_list_entry->gref = ref; + + buffer_mfn = pfn_to_mfn(page_to_pfn( + granted_page)); + gnttab_grant_foreign_access_ref(ref, info->xbdev->otherend_id, - buffer_mfn, - rq_data_dir(req)); + buffer_mfn, 0); + } + + info->shadow[id].grants_used[i] = gnt_list_entry; + + if (rq_data_dir(req)) { + char *bvec_data; + void *shared_data; + + BUG_ON(sg->offset + sg->length > PAGE_SIZE); + + shared_data = kmap_atomic( + pfn_to_page(gnt_list_entry->pfn)); + bvec_data = kmap_atomic(sg_page(sg)); + + /* + * this does not wipe data stored outside the + * range sg->offset..sg->offset+sg->length. + * Therefore, blkback *could* see data from + * previous requests. This is OK as long as + * persistent grants are shared with just one + * domain. It may need refactoring if this + * changes + */ + memcpy(shared_data + sg->offset, + bvec_data + sg->offset, + sg->length); + + kunmap_atomic(bvec_data); + kunmap_atomic(shared_data); + } info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); ring_req->u.rw.seg[i] = @@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req) /* Keep a private copy so we can reissue requests when recovering. */ info->shadow[id].req = *ring_req; - gnttab_free_grant_references(gref_head); + if (new_persistent_gnts) + gnttab_free_grant_references(gref_head); return 0; } @@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) static void xlvbd_flush(struct blkfront_info *info) { blk_queue_flush(info->rq, info->feature_flush); - printk(KERN_INFO "blkfront: %s: %s: %s\n", + printk(KERN_INFO "blkfront: %s: %s: %s %s\n", info->gd->disk_name, info->flush_op == BLKIF_OP_WRITE_BARRIER ? "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? "flush diskcache" : "barrier or flush"), - info->feature_flush ? "enabled" : "disabled"); + info->feature_flush ? "enabled" : "disabled", + info->feature_persistent ? "using persistent grants" : ""); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { + struct llist_node *all_gnts; + struct grant *persistent_gnt; + /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); info->connected = suspend ? @@ -714,6 +800,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* No more blkif_request(). */ if (info->rq) blk_stop_queue(info->rq); + + /* Remove all persistent grants */ + if (info->persistent_gnts_c) { + all_gnts = llist_del_all(&info->persistent_gnts); + llist_for_each_entry(persistent_gnt, all_gnts, node) { + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + kfree(persistent_gnt); + } + info->persistent_gnts_c = 0; + } + /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); spin_unlock_irq(&info->io_lock); @@ -734,13 +831,42 @@ static void blkif_free(struct blkfront_info *info, int suspend) } -static void blkif_completion(struct blk_shadow *s) +static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, + struct blkif_response *bret) { int i; - /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place - * flag. */ - for (i = 0; i < s->req.u.rw.nr_segments; i++) - gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); + struct bio_vec *bvec; + struct req_iterator iter; + unsigned long flags; + char *bvec_data; + void *shared_data; + unsigned int offset = 0; + + if (bret->operation == BLKIF_OP_READ) { + /* + * Copy the data received from the backend into the bvec. + * Since bv_offset can be different than 0, and bv_len different + * than PAGE_SIZE, we have to keep track of the current offset, + * to be sure we are copying the data from the right shared page. + */ + rq_for_each_segment(bvec, s->request, iter) { + BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); + i = offset >> PAGE_SHIFT; + shared_data = kmap_atomic( + pfn_to_page(s->grants_used[i]->pfn)); + bvec_data = bvec_kmap_irq(bvec, &flags); + memcpy(bvec_data, shared_data + bvec->bv_offset, + bvec->bv_len); + bvec_kunmap_irq(bvec_data, &flags); + kunmap_atomic(shared_data); + offset += bvec->bv_len; + } + } + /* Add the persistent grant into the list of free grants */ + for (i = 0; i < s->req.u.rw.nr_segments; i++) { + llist_add(&s->grants_used[i]->node, &info->persistent_gnts); + info->persistent_gnts_c++; + } } static irqreturn_t blkif_interrupt(int irq, void *dev_id) @@ -783,7 +909,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) req = info->shadow[id].request; if (bret->operation != BLKIF_OP_DISCARD) - blkif_completion(&info->shadow[id]); + blkif_completion(&info->shadow[id], info, bret); if (add_id_to_freelist(info, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", @@ -942,6 +1068,11 @@ again: message = "writing protocol"; goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, + "feature-persistent-grants", "%u", 1); + if (err) + dev_warn(&dev->dev, + "writing persistent grants feature to xenbus"); err = xenbus_transaction_end(xbt, 0); if (err) { @@ -1029,6 +1160,8 @@ static int blkfront_probe(struct xenbus_device *dev, spin_lock_init(&info->io_lock); info->xbdev = dev; info->vdevice = vdevice; + init_llist_head(&info->persistent_gnts); + info->persistent_gnts_c = 0; info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); @@ -1093,7 +1226,7 @@ static int blkif_recover(struct blkfront_info *info) req->u.rw.seg[j].gref, info->xbdev->otherend_id, pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), - rq_data_dir(info->shadow[req->u.rw.id].request)); + 0); } info->shadow[req->u.rw.id].req = *req; @@ -1225,7 +1358,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int binfo; int err; - int barrier, flush, discard; + int barrier, flush, discard, persistent; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -1303,6 +1436,14 @@ static void blkfront_connect(struct blkfront_info *info) if (!err && discard) blkfront_setup_discard(info); + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-persistent", "%u", &persistent, + NULL); + if (err) + info->feature_persistent = 0; + else + info->feature_persistent = persistent; + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", |