diff options
author | Sarah Sharp <sarah.a.sharp@linux.intel.com> | 2010-04-02 15:34:16 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2010-05-20 13:21:38 -0700 |
commit | 8df75f42f8e67e2851cdcf6da91640fb881defd1 (patch) | |
tree | 8af91f0a691d1b76f0298480e84fb77c394c24dc /drivers/usb/host/xhci-mem.c | |
parent | 94af1220985c71cd80d6c161b7a42c51ef08b923 (diff) |
USB: xhci: Add memory allocation for USB3 bulk streams.
Add support for allocating streams for USB 3.0 bulk endpoints. See
Documentation/usb/bulk-streams.txt for more information about how and why
you would use streams.
When an endpoint has streams enabled, instead of having one ring where all
transfers are enqueued to the hardware, it has several rings. The ring
dequeue pointer in the endpoint context is changed to point to a "Stream
Context Array". This is basically an array of pointers to transfer rings,
one for each stream ID that the driver wants to use.
The Stream Context Array size must be a power of two, and host controllers
can place a limit on the size of the array (4 to 2^16 entries). These
two facts make calculating the size of the Stream Context Array and the
number of entries actually used by the driver a bit tricky.
Besides the Stream Context Array and rings for all the stream IDs, we need
one more data structure. The xHCI hardware will not tell us which stream
ID a transfer event was for, but it will give us the slot ID, endpoint
index, and physical address for the TRB that caused the event. For every
endpoint on a device, add a radix tree to map physical TRB addresses to
virtual segments within a stream ring.
Keep track of whether an endpoint is transitioning to using streams, and
don't enqueue any URBs while that's taking place. Refuse to transition an
endpoint to streams if there are already URBs enqueued for that endpoint.
We need to make sure that freeing streams does not fail, since a driver's
disconnect() function may attempt to do this, and it cannot fail.
Pre-allocate the command structure used to issue the Configure Endpoint
command, and reserve space on the command ring for each stream endpoint.
This may be a bit overkill, but it is permissible for the driver to
allocate all streams in one call and free them in multiple calls. (It is
not advised, however, since it is a waste of resources and time.)
Even with the memory and ring room pre-allocated, freeing streams can
still fail because the xHC rejects the configure endpoint command. It is
valid (by the xHCI 0.96 spec) to return a "Bandwidth Error" or a "Resource
Error" for a configure endpoint command. We should never see a Bandwidth
Error, since bulk endpoints do not effect the reserved bandwidth. The
host controller can still return a Resource Error, but it's improbable
since the xHC would be going from a more resource-intensive configuration
(streams) to a less resource-intensive configuration (no streams).
If the xHC returns a Resource Error, the endpoint will be stuck with
streams and will be unusable for drivers. It's an unavoidable consequence
of broken host controller hardware.
Includes bug fixes from the original patch, contributed by
John Youn <John.Youn@synopsys.com> and Andy Green <AGreen@PLXTech.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'drivers/usb/host/xhci-mem.c')
-rw-r--r-- | drivers/usb/host/xhci-mem.c | 379 |
1 files changed, 378 insertions, 1 deletions
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d64f5724bfc..d299ffad806 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -304,6 +304,350 @@ struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci, (ctx->bytes + (ep_index * CTX_SIZE(xhci->hcc_params))); } + +/***************** Streams structures manipulation *************************/ + +void xhci_free_stream_ctx(struct xhci_hcd *xhci, + unsigned int num_stream_ctxs, + struct xhci_stream_ctx *stream_ctx, dma_addr_t dma) +{ + struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); + + if (num_stream_ctxs > MEDIUM_STREAM_ARRAY_SIZE) + pci_free_consistent(pdev, + sizeof(struct xhci_stream_ctx)*num_stream_ctxs, + stream_ctx, dma); + else if (num_stream_ctxs <= SMALL_STREAM_ARRAY_SIZE) + return dma_pool_free(xhci->small_streams_pool, + stream_ctx, dma); + else + return dma_pool_free(xhci->medium_streams_pool, + stream_ctx, dma); +} + +/* + * The stream context array for each endpoint with bulk streams enabled can + * vary in size, based on: + * - how many streams the endpoint supports, + * - the maximum primary stream array size the host controller supports, + * - and how many streams the device driver asks for. + * + * The stream context array must be a power of 2, and can be as small as + * 64 bytes or as large as 1MB. + */ +struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci, + unsigned int num_stream_ctxs, dma_addr_t *dma, + gfp_t mem_flags) +{ + struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); + + if (num_stream_ctxs > MEDIUM_STREAM_ARRAY_SIZE) + return pci_alloc_consistent(pdev, + sizeof(struct xhci_stream_ctx)*num_stream_ctxs, + dma); + else if (num_stream_ctxs <= SMALL_STREAM_ARRAY_SIZE) + return dma_pool_alloc(xhci->small_streams_pool, + mem_flags, dma); + else + return dma_pool_alloc(xhci->medium_streams_pool, + mem_flags, dma); +} + +#ifdef CONFIG_USB_XHCI_HCD_DEBUGGING +struct xhci_ring *dma_to_stream_ring( + struct xhci_stream_info *stream_info, + u64 address) +{ + return radix_tree_lookup(&stream_info->trb_address_map, + address >> SEGMENT_SHIFT); +} +#endif /* CONFIG_USB_XHCI_HCD_DEBUGGING */ + +#ifdef CONFIG_USB_XHCI_HCD_DEBUGGING +static int xhci_test_radix_tree(struct xhci_hcd *xhci, + unsigned int num_streams, + struct xhci_stream_info *stream_info) +{ + u32 cur_stream; + struct xhci_ring *cur_ring; + u64 addr; + + for (cur_stream = 1; cur_stream < num_streams; cur_stream++) { + struct xhci_ring *mapped_ring; + int trb_size = sizeof(union xhci_trb); + + cur_ring = stream_info->stream_rings[cur_stream]; + for (addr = cur_ring->first_seg->dma; + addr < cur_ring->first_seg->dma + SEGMENT_SIZE; + addr += trb_size) { + mapped_ring = dma_to_stream_ring(stream_info, addr); + if (cur_ring != mapped_ring) { + xhci_warn(xhci, "WARN: DMA address 0x%08llx " + "didn't map to stream ID %u; " + "mapped to ring %p\n", + (unsigned long long) addr, + cur_stream, + mapped_ring); + return -EINVAL; + } + } + /* One TRB after the end of the ring segment shouldn't return a + * pointer to the current ring (although it may be a part of a + * different ring). + */ + mapped_ring = dma_to_stream_ring(stream_info, addr); + if (mapped_ring != cur_ring) { + /* One TRB before should also fail */ + addr = cur_ring->first_seg->dma - trb_size; + mapped_ring = dma_to_stream_ring(stream_info, addr); + } + if (mapped_ring == cur_ring) { + xhci_warn(xhci, "WARN: Bad DMA address 0x%08llx " + "mapped to valid stream ID %u; " + "mapped ring = %p\n", + (unsigned long long) addr, + cur_stream, + mapped_ring); + return -EINVAL; + } + } + return 0; +} +#endif /* CONFIG_USB_XHCI_HCD_DEBUGGING */ + +/* + * Change an endpoint's internal structure so it supports stream IDs. The + * number of requested streams includes stream 0, which cannot be used by device + * drivers. + * + * The number of stream contexts in the stream context array may be bigger than + * the number of streams the driver wants to use. This is because the number of + * stream context array entries must be a power of two. + * + * We need a radix tree for mapping physical addresses of TRBs to which stream + * ID they belong to. We need to do this because the host controller won't tell + * us which stream ring the TRB came from. We could store the stream ID in an + * event data TRB, but that doesn't help us for the cancellation case, since the + * endpoint may stop before it reaches that event data TRB. + * + * The radix tree maps the upper portion of the TRB DMA address to a ring + * segment that has the same upper portion of DMA addresses. For example, say I + * have segments of size 1KB, that are always 64-byte aligned. A segment may + * start at 0x10c91000 and end at 0x10c913f0. If I use the upper 10 bits, the + * key to the stream ID is 0x43244. I can use the DMA address of the TRB to + * pass the radix tree a key to get the right stream ID: + * + * 0x10c90fff >> 10 = 0x43243 + * 0x10c912c0 >> 10 = 0x43244 + * 0x10c91400 >> 10 = 0x43245 + * + * Obviously, only those TRBs with DMA addresses that are within the segment + * will make the radix tree return the stream ID for that ring. + * + * Caveats for the radix tree: + * + * The radix tree uses an unsigned long as a key pair. On 32-bit systems, an + * unsigned long will be 32-bits; on a 64-bit system an unsigned long will be + * 64-bits. Since we only request 32-bit DMA addresses, we can use that as the + * key on 32-bit or 64-bit systems (it would also be fine if we asked for 64-bit + * PCI DMA addresses on a 64-bit system). There might be a problem on 32-bit + * extended systems (where the DMA address can be bigger than 32-bits), + * if we allow the PCI dma mask to be bigger than 32-bits. So don't do that. + */ +struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci, + unsigned int num_stream_ctxs, + unsigned int num_streams, gfp_t mem_flags) +{ + struct xhci_stream_info *stream_info; + u32 cur_stream; + struct xhci_ring *cur_ring; + unsigned long key; + u64 addr; + int ret; + + xhci_dbg(xhci, "Allocating %u streams and %u " + "stream context array entries.\n", + num_streams, num_stream_ctxs); + if (xhci->cmd_ring_reserved_trbs == MAX_RSVD_CMD_TRBS) { + xhci_dbg(xhci, "Command ring has no reserved TRBs available\n"); + return NULL; + } + xhci->cmd_ring_reserved_trbs++; + + stream_info = kzalloc(sizeof(struct xhci_stream_info), mem_flags); + if (!stream_info) + goto cleanup_trbs; + + stream_info->num_streams = num_streams; + stream_info->num_stream_ctxs = num_stream_ctxs; + + /* Initialize the array of virtual pointers to stream rings. */ + stream_info->stream_rings = kzalloc( + sizeof(struct xhci_ring *)*num_streams, + mem_flags); + if (!stream_info->stream_rings) + goto cleanup_info; + + /* Initialize the array of DMA addresses for stream rings for the HW. */ + stream_info->stream_ctx_array = xhci_alloc_stream_ctx(xhci, + num_stream_ctxs, &stream_info->ctx_array_dma, + mem_flags); + if (!stream_info->stream_ctx_array) + goto cleanup_ctx; + memset(stream_info->stream_ctx_array, 0, + sizeof(struct xhci_stream_ctx)*num_stream_ctxs); + + /* Allocate everything needed to free the stream rings later */ + stream_info->free_streams_command = + xhci_alloc_command(xhci, true, true, mem_flags); + if (!stream_info->free_streams_command) + goto cleanup_ctx; + + INIT_RADIX_TREE(&stream_info->trb_address_map, GFP_ATOMIC); + + /* Allocate rings for all the streams that the driver will use, + * and add their segment DMA addresses to the radix tree. + * Stream 0 is reserved. + */ + for (cur_stream = 1; cur_stream < num_streams; cur_stream++) { + stream_info->stream_rings[cur_stream] = + xhci_ring_alloc(xhci, 1, true, mem_flags); + cur_ring = stream_info->stream_rings[cur_stream]; + if (!cur_ring) + goto cleanup_rings; + /* Set deq ptr, cycle bit, and stream context type */ + addr = cur_ring->first_seg->dma | + SCT_FOR_CTX(SCT_PRI_TR) | + cur_ring->cycle_state; + stream_info->stream_ctx_array[cur_stream].stream_ring = addr; + xhci_dbg(xhci, "Setting stream %d ring ptr to 0x%08llx\n", + cur_stream, (unsigned long long) addr); + + key = (unsigned long) + (cur_ring->first_seg->dma >> SEGMENT_SHIFT); + ret = radix_tree_insert(&stream_info->trb_address_map, + key, cur_ring); + if (ret) { + xhci_ring_free(xhci, cur_ring); + stream_info->stream_rings[cur_stream] = NULL; + goto cleanup_rings; + } + } + /* Leave the other unused stream ring pointers in the stream context + * array initialized to zero. This will cause the xHC to give us an + * error if the device asks for a stream ID we don't have setup (if it + * was any other way, the host controller would assume the ring is + * "empty" and wait forever for data to be queued to that stream ID). + */ +#if XHCI_DEBUG + /* Do a little test on the radix tree to make sure it returns the + * correct values. + */ + if (xhci_test_radix_tree(xhci, num_streams, stream_info)) + goto cleanup_rings; +#endif + + return stream_info; + +cleanup_rings: + for (cur_stream = 1; cur_stream < num_streams; cur_stream++) { + cur_ring = stream_info->stream_rings[cur_stream]; + if (cur_ring) { + addr = cur_ring->first_seg->dma; + radix_tree_delete(&stream_info->trb_address_map, + addr >> SEGMENT_SHIFT); + xhci_ring_free(xhci, cur_ring); + stream_info->stream_rings[cur_stream] = NULL; + } + } + xhci_free_command(xhci, stream_info->free_streams_command); +cleanup_ctx: + kfree(stream_info->stream_rings); +cleanup_info: + kfree(stream_info); +cleanup_trbs: + xhci->cmd_ring_reserved_trbs--; + return NULL; +} +/* + * Sets the MaxPStreams field and the Linear Stream Array field. + * Sets the dequeue pointer to the stream context array. + */ +void xhci_setup_streams_ep_input_ctx(struct xhci_hcd *xhci, + struct xhci_ep_ctx *ep_ctx, + struct xhci_stream_info *stream_info) +{ + u32 max_primary_streams; + /* MaxPStreams is the number of stream context array entries, not the + * number we're actually using. Must be in 2^(MaxPstreams + 1) format. + * fls(0) = 0, fls(0x1) = 1, fls(0x10) = 2, fls(0x100) = 3, etc. + */ + max_primary_streams = fls(stream_info->num_stream_ctxs) - 2; + xhci_dbg(xhci, "Setting number of stream ctx array entries to %u\n", + 1 << (max_primary_streams + 1)); + ep_ctx->ep_info &= ~EP_MAXPSTREAMS_MASK; + ep_ctx->ep_info |= EP_MAXPSTREAMS(max_primary_streams); + ep_ctx->ep_info |= EP_HAS_LSA; + ep_ctx->deq = stream_info->ctx_array_dma; +} + +/* + * Sets the MaxPStreams field and the Linear Stream Array field to 0. + * Reinstalls the "normal" endpoint ring (at its previous dequeue mark, + * not at the beginning of the ring). + */ +void xhci_setup_no_streams_ep_input_ctx(struct xhci_hcd *xhci, + struct xhci_ep_ctx *ep_ctx, + struct xhci_virt_ep *ep) +{ + dma_addr_t addr; + ep_ctx->ep_info &= ~EP_MAXPSTREAMS_MASK; + ep_ctx->ep_info &= ~EP_HAS_LSA; + addr = xhci_trb_virt_to_dma(ep->ring->deq_seg, ep->ring->dequeue); + ep_ctx->deq = addr | ep->ring->cycle_state; +} + +/* Frees all stream contexts associated with the endpoint, + * + * Caller should fix the endpoint context streams fields. + */ +void xhci_free_stream_info(struct xhci_hcd *xhci, + struct xhci_stream_info *stream_info) +{ + int cur_stream; + struct xhci_ring *cur_ring; + dma_addr_t addr; + + if (!stream_info) + return; + + for (cur_stream = 1; cur_stream < stream_info->num_streams; + cur_stream++) { + cur_ring = stream_info->stream_rings[cur_stream]; + if (cur_ring) { + addr = cur_ring->first_seg->dma; + radix_tree_delete(&stream_info->trb_address_map, + addr >> SEGMENT_SHIFT); + xhci_ring_free(xhci, cur_ring); + stream_info->stream_rings[cur_stream] = NULL; + } + } + xhci_free_command(xhci, stream_info->free_streams_command); + xhci->cmd_ring_reserved_trbs--; + if (stream_info->stream_ctx_array) + xhci_free_stream_ctx(xhci, + stream_info->num_stream_ctxs, + stream_info->stream_ctx_array, + stream_info->ctx_array_dma); + + if (stream_info) + kfree(stream_info->stream_rings); + kfree(stream_info); +} + + +/***************** Device context manipulation *************************/ + static void xhci_init_endpoint_timer(struct xhci_hcd *xhci, struct xhci_virt_ep *ep) { @@ -328,9 +672,13 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) if (!dev) return; - for (i = 0; i < 31; ++i) + for (i = 0; i < 31; ++i) { if (dev->eps[i].ring) xhci_ring_free(xhci, dev->eps[i].ring); + if (dev->eps[i].stream_info) + xhci_free_stream_info(xhci, + dev->eps[i].stream_info); + } if (dev->ring_cache) { for (i = 0; i < dev->num_rings_cached; i++) @@ -655,6 +1003,9 @@ static inline u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci, return max_packet * (max_burst + 1); } +/* Set up an endpoint with one ring segment. Do not allocate stream rings. + * Drivers will have to call usb_alloc_streams() to do that. + */ int xhci_endpoint_init(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_device *udev, @@ -1003,6 +1354,16 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->device_pool = NULL; xhci_dbg(xhci, "Freed device context pool\n"); + if (xhci->small_streams_pool) + dma_pool_destroy(xhci->small_streams_pool); + xhci->small_streams_pool = NULL; + xhci_dbg(xhci, "Freed small stream array pool\n"); + + if (xhci->medium_streams_pool) + dma_pool_destroy(xhci->medium_streams_pool); + xhci->medium_streams_pool = NULL; + xhci_dbg(xhci, "Freed medium stream array pool\n"); + xhci_write_64(xhci, 0, &xhci->op_regs->dcbaa_ptr); if (xhci->dcbaa) pci_free_consistent(pdev, sizeof(*xhci->dcbaa), @@ -1239,6 +1600,22 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) if (!xhci->segment_pool || !xhci->device_pool) goto fail; + /* Linear stream context arrays don't have any boundary restrictions, + * and only need to be 16-byte aligned. + */ + xhci->small_streams_pool = + dma_pool_create("xHCI 256 byte stream ctx arrays", + dev, SMALL_STREAM_ARRAY_SIZE, 16, 0); + xhci->medium_streams_pool = + dma_pool_create("xHCI 1KB stream ctx arrays", + dev, MEDIUM_STREAM_ARRAY_SIZE, 16, 0); + /* Any stream context array bigger than MEDIUM_STREAM_ARRAY_SIZE + * will be allocated with pci_alloc_consistent() + */ + + if (!xhci->small_streams_pool || !xhci->medium_streams_pool) + goto fail; + /* Set up the command ring to have one segments for now. */ xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, flags); if (!xhci->cmd_ring) |