From 17230acdc71137622ca7dfd789b3944c75d39404 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 19 Feb 2007 15:52:45 -0500 Subject: UHCI: Eliminate asynchronous skeleton Queue Headers This patch (as856) attempts to improve the performance of uhci-hcd by removing the asynchronous skeleton Queue Headers. They don't contain any useful information but the controller has to read through them at least once every millisecond, incurring a non-zero DMA overhead. Now all the asynchronous queues are combined, along with the period-1 interrupt queue, into a single list with a single skeleton QH. The start of the low-speed control, full-speed control, and bulk sublists is determined by linear search. Since there should rarely be more than a couple of QHs in the list, the searches should incur a much smaller total load than keeping the skeleton QHs. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/uhci-hcd.c | 52 ++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) (limited to 'drivers/usb/host/uhci-hcd.c') diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 1f0833ab294..44da4334f1d 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -13,7 +13,7 @@ * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface * support from usb-ohci.c by Adam Richter, adam@yggdrasil.com). * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c) - * (C) Copyright 2004-2006 Alan Stern, stern@rowland.harvard.edu + * (C) Copyright 2004-2007 Alan Stern, stern@rowland.harvard.edu * * Intel documents this fairly well, and as far as I know there * are no royalties or anything like that, but even so there are @@ -107,10 +107,10 @@ static __le32 uhci_frame_skel_link(struct uhci_hcd *uhci, int frame) * interrupt QHs, which will help spread out bandwidth utilization. * * ffs (Find First bit Set) does exactly what we need: - * 1,3,5,... => ffs = 0 => use skel_int2_qh = skelqh[8], - * 2,6,10,... => ffs = 1 => use skel_int4_qh = skelqh[7], etc. + * 1,3,5,... => ffs = 0 => use period-2 QH = skelqh[8], + * 2,6,10,... => ffs = 1 => use period-4 QH = skelqh[7], etc. * ffs >= 7 => not on any high-period queue, so use - * skel_int1_qh = skelqh[9]. + * period-1 QH = skelqh[9]. * Add in UHCI_NUMFRAMES to insure at least one bit is set. */ skelnum = 8 - (int) __ffs(frame | UHCI_NUMFRAMES); @@ -540,16 +540,18 @@ static void uhci_shutdown(struct pci_dev *pdev) * * The hardware doesn't really know any difference * in the queues, but the order does matter for the - * protocols higher up. The order is: + * protocols higher up. The order in which the queues + * are encountered by the hardware is: * - * - any isochronous events handled before any + * - All isochronous events are handled before any * of the queues. We don't do that here, because * we'll create the actual TD entries on demand. - * - The first queue is the interrupt queue. - * - The second queue is the control queue, split into low- and full-speed - * - The third queue is bulk queue. - * - The fourth queue is the bandwidth reclamation queue, which loops back - * to the full-speed control queue. + * - The first queue is the high-period interrupt queue. + * - The second queue is the period-1 interrupt and async + * (low-speed control, full-speed control, then bulk) queue. + * - The third queue is the terminating bandwidth reclamation queue, + * which contains no members, loops back to itself, and is present + * only when FSBR is on and there are no full-speed control or bulk QHs. */ static int uhci_start(struct usb_hcd *hcd) { @@ -626,30 +628,18 @@ static int uhci_start(struct usb_hcd *hcd) } /* - * 8 Interrupt queues; link all higher int queues to int1, - * then link int1 to control and control to bulk + * 8 Interrupt queues; link all higher int queues to int1 = async */ - uhci->skel_int128_qh->link = - uhci->skel_int64_qh->link = - uhci->skel_int32_qh->link = - uhci->skel_int16_qh->link = - uhci->skel_int8_qh->link = - uhci->skel_int4_qh->link = - uhci->skel_int2_qh->link = LINK_TO_QH( - uhci->skel_int1_qh); - - uhci->skel_int1_qh->link = LINK_TO_QH(uhci->skel_ls_control_qh); - uhci->skel_ls_control_qh->link = LINK_TO_QH(uhci->skel_fs_control_qh); - uhci->skel_fs_control_qh->link = LINK_TO_QH(uhci->skel_bulk_qh); - uhci->skel_bulk_qh->link = LINK_TO_QH(uhci->skel_term_qh); + for (i = SKEL_ISO + 1; i < SKEL_ASYNC; ++i) + uhci->skelqh[i]->link = LINK_TO_QH(uhci->skel_async_qh); + uhci->skel_async_qh->link = uhci->skel_term_qh->link = UHCI_PTR_TERM; /* This dummy TD is to work around a bug in Intel PIIX controllers */ uhci_fill_td(uhci->term_td, 0, uhci_explen(0) | - (0x7f << TD_TOKEN_DEVADDR_SHIFT) | USB_PID_IN, 0); - uhci->term_td->link = LINK_TO_TD(uhci->term_td); - - uhci->skel_term_qh->link = UHCI_PTR_TERM; - uhci->skel_term_qh->element = LINK_TO_TD(uhci->term_td); + (0x7f << TD_TOKEN_DEVADDR_SHIFT) | USB_PID_IN, 0); + uhci->term_td->link = UHCI_PTR_TERM; + uhci->skel_async_qh->element = uhci->skel_term_qh->element = + LINK_TO_TD(uhci->term_td); /* * Fill the frame list: make all entries point to the proper -- cgit v1.2.3-70-g09d2