diff options
Diffstat (limited to 'drivers/infiniband/hw/ipath')
37 files changed, 4350 insertions, 3103 deletions
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 1db9489f1e8..574a678e7fd 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,16 +1,9 @@ -config IPATH_CORE - tristate "QLogic InfiniPath Driver" - depends on 64BIT && PCI_MSI && NET - ---help--- - This is a low-level driver for QLogic InfiniPath host channel - adapters (HCAs) based on the HT-400 and PE-800 chips. - config INFINIBAND_IPATH - tristate "QLogic InfiniPath Verbs Driver" - depends on IPATH_CORE && INFINIBAND + tristate "QLogic InfiniPath Driver" + depends on PCI_MSI && 64BIT && INFINIBAND ---help--- - This is a driver that provides InfiniBand verbs support for - QLogic InfiniPath host channel adapters (HCAs). This - allows these devices to be used with both kernel upper level - protocols such as IP-over-InfiniBand as well as with userspace - applications (in conjunction with InfiniBand userspace access). + This is a driver for QLogic InfiniPath host channel adapters, + including InfiniBand verbs support. This driver allows these + devices to be used with both kernel upper level protocols such + as IP-over-InfiniBand as well as with userspace applications + (in conjunction with InfiniBand userspace access). diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index b0bf7286413..5e29cb0095e 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -1,36 +1,35 @@ EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \ -DIPATH_KERN_TYPE=0 -obj-$(CONFIG_IPATH_CORE) += ipath_core.o obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o -ipath_core-y := \ +ib_ipath-y := \ + ipath_cq.o \ ipath_diag.o \ ipath_driver.o \ ipath_eeprom.o \ ipath_file_ops.o \ ipath_fs.o \ - ipath_ht400.o \ + ipath_iba6110.o \ + ipath_iba6120.o \ ipath_init_chip.o \ ipath_intr.o \ - ipath_layer.o \ - ipath_pe800.o \ - ipath_stats.o \ - ipath_sysfs.o \ - ipath_user_pages.o - -ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o - -ib_ipath-y := \ - ipath_cq.o \ ipath_keys.o \ + ipath_layer.o \ ipath_mad.o \ + ipath_mmap.o \ ipath_mr.o \ ipath_qp.o \ ipath_rc.o \ ipath_ruc.o \ ipath_srq.o \ + ipath_stats.o \ + ipath_sysfs.o \ ipath_uc.o \ ipath_ud.o \ - ipath_verbs.o \ - ipath_verbs_mcast.o + ipath_user_pages.o \ + ipath_verbs_mcast.o \ + ipath_verbs.o + +ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o +ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 062bd392e7e..54139d39818 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -106,9 +106,9 @@ struct infinipath_stats { __u64 sps_ether_spkts; /* number of "ethernet" packets received by driver */ __u64 sps_ether_rpkts; - /* number of SMA packets sent by driver */ + /* number of SMA packets sent by driver. Obsolete. */ __u64 sps_sma_spkts; - /* number of SMA packets received by driver */ + /* number of SMA packets received by driver. Obsolete. */ __u64 sps_sma_rpkts; /* number of times all ports rcvhdrq was full and packet dropped */ __u64 sps_hdrqfull; @@ -138,11 +138,12 @@ struct infinipath_stats { __u64 sps_pageunlocks; /* * Number of packets dropped in kernel other than errors (ether - * packets if ipath not configured, sma/mad, etc.) + * packets if ipath not configured, etc.) */ __u64 sps_krdrops; + __u64 sps_txeparity; /* PIO buffer parity error, recovered */ /* pad for future growth */ - __u64 __sps_pad[46]; + __u64 __sps_pad[45]; }; /* @@ -153,8 +154,6 @@ struct infinipath_stats { #define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ /* Device has been disabled via admin request */ #define IPATH_STATUS_ADMIN_DISABLED 0x4 -#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */ -#define IPATH_STATUS_SMA 0x10 /* user SMA running */ /* Chip has been found and initted */ #define IPATH_STATUS_CHIP_PRESENT 0x20 /* IB link is at ACTIVE, usable for data traffic */ @@ -187,6 +186,9 @@ typedef enum _ipath_ureg { #define IPATH_RUNTIME_PCIE 0x2 #define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 #define IPATH_RUNTIME_RCVHDR_COPY 0x8 +#define IPATH_RUNTIME_MASTER 0x10 +#define IPATH_RUNTIME_PBC_REWRITE 0x20 +#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40 /* * This structure is returned by ipath_userinit() immediately after @@ -204,7 +206,8 @@ struct ipath_base_info { /* version of software, for feature checking. */ __u32 spi_sw_version; /* InfiniPath port assigned, goes into sent packets */ - __u32 spi_port; + __u16 spi_port; + __u16 spi_subport; /* * IB MTU, packets IB data must be less than this. * The MTU is in bytes, and will be a multiple of 4 bytes. @@ -220,7 +223,7 @@ struct ipath_base_info { __u32 spi_tidcnt; /* size of the TID Eager list in infinipath, in entries */ __u32 spi_tidegrcnt; - /* size of a single receive header queue entry. */ + /* size of a single receive header queue entry in words. */ __u32 spi_rcvhdrent_size; /* * Count of receive header queue entries allocated. @@ -312,6 +315,12 @@ struct ipath_base_info { __u32 spi_filler_for_align; /* address of readonly memory copy of the rcvhdrq tail register. */ __u64 spi_rcvhdr_tailaddr; + + /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ + __u64 spi_subport_uregbase; + __u64 spi_subport_rcvegrbuf; + __u64 spi_subport_rcvhdr_base; + } __attribute__ ((aligned(8))); @@ -330,12 +339,12 @@ struct ipath_base_info { /* * Minor version differences are always compatible - * a within a major version, however if if user software is larger + * a within a major version, however if user software is larger * than driver software, some new features and/or structure fields * may not be implemented; the user code must deal with this if it - * cares, or it must abort after initialization reports the difference + * cares, or it must abort after initialization reports the difference. */ -#define IPATH_USER_SWMINOR 2 +#define IPATH_USER_SWMINOR 3 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) @@ -381,7 +390,16 @@ struct ipath_user_info { */ __u32 spu_rcvhdrsize; - __u64 spu_unused; /* kept for compatible layout */ + /* + * If two or more processes wish to share a port, each process + * must set the spu_subport_cnt and spu_subport_id to the same + * values. The only restriction on the spu_subport_id is that + * it be unique for a given node. + */ + __u16 spu_subport_cnt; + __u16 spu_subport_id; + + __u32 spu_unused; /* kept for compatible layout */ /* * address of struct base_info to write to @@ -394,19 +412,25 @@ struct ipath_user_info { #define IPATH_CMD_MIN 16 -#define IPATH_CMD_USER_INIT 16 /* set up userspace */ +#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */ #define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */ #define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */ #define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ #define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ #define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ +#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ +#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ +#define IPATH_CMD_USER_INIT 24 /* set up userspace */ -#define IPATH_CMD_MAX 21 +#define IPATH_CMD_MAX 24 struct ipath_port_info { __u32 num_active; /* number of active units */ __u32 unit; /* unit (chip) assigned to caller */ - __u32 port; /* port on unit assigned to caller */ + __u16 port; /* port on unit assigned to caller */ + __u16 subport; /* subport on unit assigned to caller */ + __u16 num_ports; /* number of ports available on unit */ + __u16 num_subports; /* number of subport slaves opened on port */ }; struct ipath_tid_info { @@ -437,6 +461,8 @@ struct ipath_cmd { __u32 recv_ctrl; /* partition key to set */ __u16 part_key; + /* user address of __u32 bitmask of active slaves */ + __u64 slave_mask_addr; } cmd; }; @@ -465,12 +491,11 @@ struct __ipath_sendpkt { struct ipath_iovec sps_iov[4]; }; -/* Passed into SMA special file's ->read and ->write methods. */ -struct ipath_sma_pkt -{ - __u32 unit; /* unit on which to send packet */ - __u64 data; /* address of payload in userspace */ - __u32 len; /* length of payload */ +/* Passed into diag data special file's ->write method. */ +struct ipath_diag_pkt { + __u32 unit; + __u64 data; + __u32 len; }; /* @@ -599,6 +624,10 @@ struct infinipath_counters { /* K_PktFlags bits */ #define INFINIPATH_KPF_INTR 0x1 +#define INFINIPATH_KPF_SUBPORT_MASK 0x3 +#define INFINIPATH_KPF_SUBPORT_SHIFT 1 + +#define INFINIPATH_MAX_SUBPORT 4 /* SendPIO per-buffer control */ #define INFINIPATH_SP_TEST 0x40 @@ -613,7 +642,7 @@ struct ipath_header { /* * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, - * Port 3, TID 11, offset 14. + * Port 4, TID 11, offset 13. */ __le32 ver_port_tid_offset; __le16 chksum; diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 3efee341c9b..87462e0cb4d 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -42,20 +42,29 @@ * @entry: work completion entry to add * @sig: true if @entry is a solicitated entry * - * This may be called with one of the qp->s_lock or qp->r_rq.lock held. + * This may be called with qp->s_lock held. */ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) { + struct ipath_cq_wc *wc; unsigned long flags; + u32 head; u32 next; spin_lock_irqsave(&cq->lock, flags); - if (cq->head == cq->ibcq.cqe) + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + wc = cq->queue; + head = wc->head; + if (head >= (unsigned) cq->ibcq.cqe) { + head = cq->ibcq.cqe; next = 0; - else - next = cq->head + 1; - if (unlikely(next == cq->tail)) { + } else + next = head + 1; + if (unlikely(next == wc->tail)) { spin_unlock_irqrestore(&cq->lock, flags); if (cq->ibcq.event_handler) { struct ib_event ev; @@ -67,8 +76,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) } return; } - cq->queue[cq->head] = *entry; - cq->head = next; + wc->queue[head] = *entry; + wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && solicited)) { @@ -101,20 +110,27 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct ipath_cq *cq = to_icq(ibcq); + struct ipath_cq_wc *wc; unsigned long flags; int npolled; + u32 tail; spin_lock_irqsave(&cq->lock, flags); + wc = cq->queue; + tail = wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (cq->tail == cq->head) + if (tail == wc->head) break; - *entry = cq->queue[cq->tail]; - if (cq->tail == cq->ibcq.cqe) - cq->tail = 0; + *entry = wc->queue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; else - cq->tail++; + tail++; } + wc->tail = tail; spin_unlock_irqrestore(&cq->lock, flags); @@ -160,38 +176,79 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, { struct ipath_ibdev *dev = to_idev(ibdev); struct ipath_cq *cq; - struct ib_wc *wc; + struct ipath_cq_wc *wc; struct ib_cq *ret; - if (entries > ib_ipath_max_cqes) { + if (entries < 1 || entries > ib_ipath_max_cqes) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } - if (dev->n_cqs_allocated == ib_ipath_max_cqs) { + /* Allocate the completion queue structure. */ + cq = kmalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } /* - * Need to use vmalloc() if we want to support large #s of - * entries. + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. */ - cq = kmalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { + wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries); + if (!wc) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_cq; } /* - * Need to use vmalloc() if we want to support large #s of entries. + * Return the address of the WC as the offset to mmap. + * See ipath_mmap() for details. */ - wc = vmalloc(sizeof(*wc) * (entries + 1)); - if (!wc) { - kfree(cq); + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) wc; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_wc; + } + + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + cq->ip = ip; + ip->context = context; + ip->obj = wc; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(*wc) + + sizeof(struct ib_wc) * entries); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } else + cq->ip = NULL; + + spin_lock(&dev->n_cqs_lock); + if (dev->n_cqs_allocated == ib_ipath_max_cqs) { + spin_unlock(&dev->n_cqs_lock); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_wc; } + + dev->n_cqs_allocated++; + spin_unlock(&dev->n_cqs_lock); + /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return @@ -202,15 +259,21 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, cq->triggered = 0; spin_lock_init(&cq->lock); tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); - cq->head = 0; - cq->tail = 0; + wc->head = 0; + wc->tail = 0; cq->queue = wc; ret = &cq->ibcq; - dev->n_cqs_allocated++; + goto done; -bail: +bail_wc: + vfree(wc); + +bail_cq: + kfree(cq); + +done: return ret; } @@ -228,8 +291,13 @@ int ipath_destroy_cq(struct ib_cq *ibcq) struct ipath_cq *cq = to_icq(ibcq); tasklet_kill(&cq->comptask); + spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; - vfree(cq->queue); + spin_unlock(&dev->n_cqs_lock); + if (cq->ip) + kref_put(&cq->ip->ref, ipath_release_mmap_info); + else + vfree(cq->queue); kfree(cq); return 0; @@ -253,7 +321,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) spin_lock_irqsave(&cq->lock, flags); /* * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow - * any other transitions. + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). */ if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = notify; @@ -261,49 +329,96 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) return 0; } +/** + * ipath_resize_cq - change the size of the CQ + * @ibcq: the completion queue + * + * Returns 0 for success. + */ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct ipath_cq *cq = to_icq(ibcq); - struct ib_wc *wc, *old_wc; - u32 n; + struct ipath_cq_wc *old_wc; + struct ipath_cq_wc *wc; + u32 head, tail, n; int ret; + if (cqe < 1 || cqe > ib_ipath_max_cqes) { + ret = -EINVAL; + goto bail; + } + /* * Need to use vmalloc() if we want to support large #s of entries. */ - wc = vmalloc(sizeof(*wc) * (cqe + 1)); + wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe); if (!wc) { ret = -ENOMEM; goto bail; } + /* + * Return the address of the WC as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = (__u64) wc; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail; + } + spin_lock_irq(&cq->lock); - if (cq->head < cq->tail) - n = cq->ibcq.cqe + 1 + cq->head - cq->tail; + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + old_wc = cq->queue; + head = old_wc->head; + if (head > (u32) cq->ibcq.cqe) + head = (u32) cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; else - n = cq->head - cq->tail; + n = head - tail; if (unlikely((u32)cqe < n)) { spin_unlock_irq(&cq->lock); vfree(wc); ret = -EOVERFLOW; goto bail; } - for (n = 0; cq->tail != cq->head; n++) { - wc[n] = cq->queue[cq->tail]; - if (cq->tail == cq->ibcq.cqe) - cq->tail = 0; + for (n = 0; tail != head; n++) { + wc->queue[n] = old_wc->queue[tail]; + if (tail == (u32) cq->ibcq.cqe) + tail = 0; else - cq->tail++; + tail++; } cq->ibcq.cqe = cqe; - cq->head = n; - cq->tail = 0; - old_wc = cq->queue; + wc->head = n; + wc->tail = 0; cq->queue = wc; spin_unlock_irq(&cq->lock); vfree(old_wc); + if (cq->ip) { + struct ipath_ibdev *dev = to_idev(ibcq->device); + struct ipath_mmap_info *ip = cq->ip; + + ip->obj = wc; + ip->size = PAGE_ALIGN(sizeof(*wc) + + sizeof(struct ib_wc) * cqe); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + ret = 0; bail: diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index f415beda0d3..df69f0d80b8 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -60,7 +60,6 @@ #define __IPATH_USER_SEND 0x1000 /* use user mode send */ #define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ #define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ -#define __IPATH_SMADBG 0x8000 /* sma packet debug */ #define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */ #define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */ #define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ @@ -84,7 +83,6 @@ /* print mmap/nopage stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x0 #define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ -#define __IPATH_SMADBG 0x0 /* process startup (init)/exit messages */ #define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ #define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ #define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 147dd89e21c..29958b6e021 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -41,11 +41,12 @@ * through the /sys/bus/pci resource mmap interface. */ +#include <linux/io.h> #include <linux/pci.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" int ipath_diag_inuse; @@ -274,6 +275,158 @@ bail: return ret; } +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off); + +static struct file_operations diagpkt_file_ops = { + .owner = THIS_MODULE, + .write = ipath_diagpkt_write, +}; + +static struct cdev *diagpkt_cdev; +static struct class_device *diagpkt_class_dev; + +int __init ipath_diagpkt_add(void) +{ + return ipath_cdev_init(IPATH_DIAGPKT_MINOR, + "ipath_diagpkt", &diagpkt_file_ops, + &diagpkt_cdev, &diagpkt_class_dev); +} + +void __exit ipath_diagpkt_remove(void) +{ + ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev); +} + +/** + * ipath_diagpkt_write - write an IB packet + * @fp: the diag data device file pointer + * @data: ipath_diag_pkt structure saying where to get the packet + * @count: size of data to write + * @off: unused by this code + */ +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off) +{ + u32 __iomem *piobuf; + u32 plen, clen, pbufn; + struct ipath_diag_pkt dp; + u32 *tmpbuf = NULL; + struct ipath_devdata *dd; + ssize_t ret = 0; + u64 val; + + if (count < sizeof(dp)) { + ret = -EINVAL; + goto bail; + } + + if (copy_from_user(&dp, data, sizeof(dp))) { + ret = -EFAULT; + goto bail; + } + + /* send count must be an exact number of dwords */ + if (dp.len & 3) { + ret = -EINVAL; + goto bail; + } + + clen = dp.len >> 2; + + dd = ipath_lookup(dp.unit); + if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || + !dd->ipath_kregbase) { + ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n", + dp.unit); + ret = -ENODEV; + goto bail; + } + + if (ipath_diag_inuse && !diag_set_link && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + diag_set_link = 1; + ipath_cdbg(VERBOSE, "Trying to set to set link active for " + "diag pkt\n"); + ipath_set_linkstate(dd, IPATH_IB_LINKARM); + ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); + } + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit); + ret = -ENODEV; + goto bail; + } + val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; + if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM && + val != IPATH_IBSTATE_ACTIVE) { + ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", + dd->ipath_unit, (unsigned long long) val); + ret = -EINVAL; + goto bail; + } + + /* need total length before first word written */ + /* +1 word is for the qword padding */ + plen = sizeof(u32) + dp.len; + + if ((plen + 4) > dd->ipath_ibmaxlen) { + ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", + plen - 4, dd->ipath_ibmaxlen); + ret = -EINVAL; + goto bail; /* before writing pbc */ + } + tmpbuf = vmalloc(plen); + if (!tmpbuf) { + dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " + "failing\n"); + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmpbuf, + (const void __user *) (unsigned long) dp.data, + dp.len)) { + ret = -EFAULT; + goto bail; + } + + piobuf = ipath_getpiobuf(dd, &pbufn); + if (!piobuf) { + ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n", + dd->ipath_unit); + ret = -EBUSY; + goto bail; + } + + plen >>= 2; /* in dwords */ + + if (ipath_debug & __IPATH_PKTDBG) + ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", + dd->ipath_unit, plen - 1, pbufn); + + /* we have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order */ + writeq(plen, piobuf); + ipath_flush_wc(); + /* copy all by the trigger word, then flush, so it's written + * to chip before trigger word, then write trigger word, then + * flush again, so packet is sent. */ + __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + ipath_flush_wc(); + __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + ipath_flush_wc(); + + ret = sizeof(dp); + +bail: + vfree(tmpbuf); + return ret; +} + static int ipath_diag_release(struct inode *in, struct file *fp) { mutex_lock(&ipath_mutex); diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index f98518d912b..12cefa658f3 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -39,7 +39,7 @@ #include <linux/vmalloc.h> #include "ipath_kernel.h" -#include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" static void ipath_update_pio_bufs(struct ipath_devdata *); @@ -51,8 +51,6 @@ const char *ipath_get_unit_name(int unit) return iname; } -EXPORT_SYMBOL_GPL(ipath_get_unit_name); - #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " #define PFX IPATH_DRV_NAME ": " @@ -60,13 +58,13 @@ EXPORT_SYMBOL_GPL(ipath_get_unit_name); * The size has to be longer than this string, so we can append * board/chip information to it in the init code. */ -const char ipath_core_version[] = IPATH_IDSTR "\n"; +const char ib_ipath_version[] = IPATH_IDSTR "\n"; static struct idr unit_table; DEFINE_SPINLOCK(ipath_devs_lock); LIST_HEAD(ipath_dev_list); -wait_queue_head_t ipath_sma_state_wait; +wait_queue_head_t ipath_state_wait; unsigned ipath_debug = __IPATH_INFO; @@ -97,16 +95,6 @@ const char *ipath_ibcstatus_str[] = { "RecovIdle", }; -/* - * These variables are initialized in the chip-specific files - * but are defined here. - */ -u16 ipath_gpio_sda_num, ipath_gpio_scl_num; -u64 ipath_gpio_sda, ipath_gpio_scl; -u64 infinipath_i_bitsextant; -ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; -u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; - static void __devexit ipath_remove_one(struct pci_dev *); static int __devinit ipath_init_one(struct pci_dev *, const struct pci_device_id *); @@ -403,10 +391,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, /* setup the chip-specific functions, as early as possible. */ switch (ent->device) { case PCI_DEVICE_ID_INFINIPATH_HT: - ipath_init_ht400_funcs(dd); + ipath_init_iba6110_funcs(dd); break; case PCI_DEVICE_ID_INFINIPATH_PE800: - ipath_init_pe800_funcs(dd); + ipath_init_iba6120_funcs(dd); break; default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " @@ -440,7 +428,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, } dd->ipath_pcirev = rev; +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + dd->ipath_kregbase = __ioremap(addr, len, + (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); +#else dd->ipath_kregbase = ioremap_nocache(addr, len); +#endif if (!dd->ipath_kregbase) { ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", @@ -503,7 +497,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipathfs_add_device(dd); ipath_user_add(dd); ipath_diag_add(dd); - ipath_layer_add(dd); + ipath_register_ib_device(dd); goto bail; @@ -523,28 +517,146 @@ bail: return ret; } +static void __devexit cleanup_device(struct ipath_devdata *dd) +{ + int port; + + ipath_shutdown_device(dd); + + if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { + /* can't do anything more with chip; needs re-init */ + *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; + if (dd->ipath_kregbase) { + /* + * if we haven't already cleaned up before these are + * to ensure any register reads/writes "fail" until + * re-init + */ + dd->ipath_kregbase = NULL; + dd->ipath_uregbase = 0; + dd->ipath_sregbase = 0; + dd->ipath_cregbase = 0; + dd->ipath_kregsize = 0; + } + ipath_disable_wc(dd); + } + + if (dd->ipath_pioavailregs_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *) dd->ipath_pioavailregs_dma, + dd->ipath_pioavailregs_phys); + dd->ipath_pioavailregs_dma = NULL; + } + if (dd->ipath_dummy_hdrq) { + dma_free_coherent(&dd->pcidev->dev, + dd->ipath_pd[0]->port_rcvhdrq_size, + dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); + dd->ipath_dummy_hdrq = NULL; + } + + if (dd->ipath_pageshadow) { + struct page **tmpp = dd->ipath_pageshadow; + dma_addr_t *tmpd = dd->ipath_physshadow; + int i, cnt = 0; + + ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " + "locked\n"); + for (port = 0; port < dd->ipath_cfgports; port++) { + int port_tidbase = port * dd->ipath_rcvtidcnt; + int maxtid = port_tidbase + dd->ipath_rcvtidcnt; + for (i = port_tidbase; i < maxtid; i++) { + if (!tmpp[i]) + continue; + pci_unmap_page(dd->pcidev, tmpd[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + ipath_release_user_pages(&tmpp[i], 1); + tmpp[i] = NULL; + cnt++; + } + } + if (cnt) { + ipath_stats.sps_pageunlocks += cnt; + ipath_cdbg(VERBOSE, "There were still %u expTID " + "entries locked\n", cnt); + } + if (ipath_stats.sps_pagelocks || + ipath_stats.sps_pageunlocks) + ipath_cdbg(VERBOSE, "%llu pages locked, %llu " + "unlocked via ipath_m{un}lock\n", + (unsigned long long) + ipath_stats.sps_pagelocks, + (unsigned long long) + ipath_stats.sps_pageunlocks); + + ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", + dd->ipath_pageshadow); + vfree(dd->ipath_pageshadow); + dd->ipath_pageshadow = NULL; + } + + /* + * free any resources still in use (usually just kernel ports) + * at unload; we do for portcnt, not cfgports, because cfgports + * could have changed while we were loaded. + */ + for (port = 0; port < dd->ipath_portcnt; port++) { + struct ipath_portdata *pd = dd->ipath_pd[port]; + dd->ipath_pd[port] = NULL; + ipath_free_pddata(dd, pd); + } + kfree(dd->ipath_pd); + /* + * debuggability, in case some cleanup path tries to use it + * after this + */ + dd->ipath_pd = NULL; +} + static void __devexit ipath_remove_one(struct pci_dev *pdev) { - struct ipath_devdata *dd; + struct ipath_devdata *dd = pci_get_drvdata(pdev); - ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev); - if (!pdev) - return; + ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); + + if (dd->verbs_dev) + ipath_unregister_ib_device(dd->verbs_dev); - dd = pci_get_drvdata(pdev); - ipath_layer_remove(dd); ipath_diag_remove(dd); ipath_user_remove(dd); ipathfs_remove_device(dd); ipath_device_remove_group(&pdev->dev, dd); + ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " "unit %u\n", dd, (u32) dd->ipath_unit); - if (dd->ipath_kregbase) { - ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", - dd->ipath_kregbase); - iounmap((volatile void __iomem *) dd->ipath_kregbase); - dd->ipath_kregbase = NULL; - } + + cleanup_device(dd); + + /* + * turn off rcv, send, and interrupts for all ports, all drivers + * should also hard reset the chip here? + * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs + * for all versions of the driver, if they were allocated + */ + if (pdev->irq) { + ipath_cdbg(VERBOSE, + "unit %u free_irq of irq %x\n", + dd->ipath_unit, pdev->irq); + free_irq(pdev->irq, dd); + } else + ipath_dbg("irq is 0, not doing free_irq " + "for unit %u\n", dd->ipath_unit); + /* + * we check for NULL here, because it's outside + * the kregbase check, and we need to call it + * after the free_irq. Thus it's possible that + * the function pointers were never initialized. + */ + if (dd->ipath_f_cleanup) + /* clean up chip-specific stuff */ + dd->ipath_f_cleanup(dd); + + ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); + iounmap((volatile void __iomem *) dd->ipath_kregbase); pci_release_regions(pdev); ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); pci_disable_device(pdev); @@ -607,21 +719,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, * * wait up to msecs milliseconds for IB link state change to occur for * now, take the easy polling route. Currently used only by - * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise + * ipath_set_linkstate. Returns 0 if state reached, otherwise * -ETIMEDOUT state can have multiple states set, for any of several * transitions. */ -int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) +static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, + int msecs) { - dd->ipath_sma_state_wanted = state; - wait_event_interruptible_timeout(ipath_sma_state_wait, + dd->ipath_state_wanted = state; + wait_event_interruptible_timeout(ipath_state_wait, (dd->ipath_flags & state), msecs_to_jiffies(msecs)); - dd->ipath_sma_state_wanted = 0; + dd->ipath_state_wanted = 0; if (!(dd->ipath_flags & state)) { u64 val; - ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n", + ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" + " ms\n", /* test INIT ahead of DOWN, both can be set */ (state & IPATH_LINKINIT) ? "INIT" : ((state & IPATH_LINKDOWN) ? "DOWN" : @@ -754,8 +868,8 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len) static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum, int err) { - return dd->ipath_port0_skbs ? - (void *)dd->ipath_port0_skbs[bufnum]->data : NULL; + return dd->ipath_port0_skbinfo ? + (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; } /** @@ -777,88 +891,39 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, */ /* - * We need 4 extra bytes for unaligned transfer copying + * We need 2 extra bytes for ipath_ether data sent in the + * key header. In order to keep everything dword aligned, + * we'll reserve 4 bytes. */ + len = dd->ipath_ibmaxlen + 4; + if (dd->ipath_flags & IPATH_4BYTE_TID) { - /* we need a 4KB multiple alignment, and there is no way + /* We need a 2KB multiple alignment, and there is no way * to do it except to allocate extra and then skb_reserve * enough to bring it up to the right alignment. */ - len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1; + len += 2047; } - else - len = dd->ipath_ibmaxlen + 4; + skb = __dev_alloc_skb(len, gfp_mask); if (!skb) { ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", len); goto bail; } + + skb_reserve(skb, 4); + if (dd->ipath_flags & IPATH_4BYTE_TID) { - u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4); + u32 una = (unsigned long)skb->data & 2047; if (una) - skb_reserve(skb, 4 + (1 << 11) - una); - else - skb_reserve(skb, 4); - } else - skb_reserve(skb, 4); + skb_reserve(skb, 2048 - una); + } bail: return skb; } -/** - * ipath_rcv_layer - receive a packet for the layered (ethernet) driver - * @dd: the infinipath device - * @etail: the sk_buff number - * @tlen: the total packet length - * @hdr: the ethernet header - * - * Separate routine for better overall optimization - */ -static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail, - u32 tlen, struct ether_header *hdr) -{ - u32 elen; - u8 pad, *bthbytes; - struct sk_buff *skb, *nskb; - - if (dd->ipath_port0_skbs && - hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) { - /* - * Allocate a new sk_buff to replace the one we give - * to the network stack. - */ - nskb = ipath_alloc_skb(dd, GFP_ATOMIC); - if (!nskb) { - /* count OK packets that we drop */ - ipath_stats.sps_krdrops++; - return; - } - - bthbytes = (u8 *) hdr->bth; - pad = (bthbytes[1] >> 4) & 3; - /* +CRC32 */ - elen = tlen - (sizeof(*hdr) + pad + sizeof(u32)); - - skb = dd->ipath_port0_skbs[etail]; - dd->ipath_port0_skbs[etail] = nskb; - skb_put(skb, elen); - - dd->ipath_f_put_tid(dd, etail + (u64 __iomem *) - ((char __iomem *) dd->ipath_kregbase - + dd->ipath_rcvegrbase), 0, - virt_to_phys(nskb->data)); - - __ipath_layer_rcv(dd, hdr, skb); - - /* another ether packet received */ - ipath_stats.sps_ether_rpkts++; - } - else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP) - __ipath_layer_rcv_lid(dd, hdr); -} - static void ipath_rcv_hdrerr(struct ipath_devdata *dd, u32 eflags, u32 l, @@ -972,26 +1037,17 @@ reloop: if (unlikely(eflags)) ipath_rcv_hdrerr(dd, eflags, l, etail, rc); else if (etype == RCVHQ_RCV_TYPE_NON_KD) { - int ret = __ipath_verbs_rcv(dd, rc + 1, - ebuf, tlen); - if (ret == -ENODEV) - ipath_cdbg(VERBOSE, - "received IB packet, " - "not SMA (QP=%x)\n", qp); - if (dd->ipath_lli_counter) - dd->ipath_lli_counter--; - - } else if (etype == RCVHQ_RCV_TYPE_EAGER) { - if (qp == IPATH_KD_QP && - bthbytes[0] == ipath_layer_rcv_opcode && - ebuf) - ipath_rcv_layer(dd, etail, tlen, - (struct ether_header *)hdr); - else - ipath_cdbg(PKT, "typ %x, opcode %x (eager, " - "qp=%x), len %x; ignored\n", - etype, bthbytes[0], qp, tlen); + ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen); + if (dd->ipath_lli_counter) + dd->ipath_lli_counter--; + ipath_cdbg(PKT, "typ %x, opcode %x (eager, " + "qp=%x), len %x; ignored\n", + etype, bthbytes[0], qp, tlen); } + else if (etype == RCVHQ_RCV_TYPE_EAGER) + ipath_cdbg(PKT, "typ %x, opcode %x (eager, " + "qp=%x), len %x; ignored\n", + etype, bthbytes[0], qp, tlen); else if (etype == RCVHQ_RCV_TYPE_EXPECTED) ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", be32_to_cpu(hdr->bth[0]) & 0xff); @@ -1024,7 +1080,8 @@ reloop: */ if (l == hdrqtail || (i && !(i&0xf))) { u64 lval; - if (l == hdrqtail) /* PE-800 interrupt only on last */ + if (l == hdrqtail) + /* request IBA6120 interrupt only on last */ lval = dd->ipath_rhdrhead_intr_off | l; else lval = l; @@ -1038,7 +1095,7 @@ reloop: } if (!dd->ipath_rhdrhead_intr_off && !reloop) { - /* HT-400 workaround; we can have a race clearing chip + /* IBA6110 workaround; we can have a race clearing chip * interrupt with another interrupt about to be delivered, * and can clear it before it is delivered on the GPIO * workaround. By doing the extra check here for the @@ -1211,7 +1268,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) * * do appropriate marking as busy, etc. * returns buffer number if one found (>=0), negative number is error. - * Used by ipath_sma_send_pkt and ipath_layer_send + * Used by ipath_layer_send */ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) { @@ -1317,13 +1374,6 @@ rescan: goto bail; } - if (updated) - /* - * ran out of bufs, now some (at least this one we just - * got) are now available, so tell the layered driver. - */ - __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); - /* * set next starting place. Since it's just an optimization, * it doesn't matter who wins on this, so no locking @@ -1387,6 +1437,9 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, "for port %u rcvhdrqtailaddr failed\n", pd->port_port); ret = -ENOMEM; + dma_free_coherent(&dd->pcidev->dev, amt, + pd->port_rcvhdrq, pd->port_rcvhdrq_phys); + pd->port_rcvhdrq = NULL; goto bail; } pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1408,12 +1461,13 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " "hdrtailaddr@%p %llx physical\n", pd->port_port, pd->port_rcvhdrq, - pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr, - (unsigned long long)pd->port_rcvhdrqtailaddr_phys); + (unsigned long long) pd->port_rcvhdrq_phys, + pd->port_rcvhdrtail_kvaddr, (unsigned long long) + pd->port_rcvhdrqtailaddr_phys); /* clear for security and sanity on each use */ memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); - memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); + memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); /* * tell chip each time we init it, even if we are re-using previous @@ -1500,7 +1554,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) return ret; } -void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) +static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) { static const char *what[4] = { [0] = "DOWN", @@ -1511,7 +1565,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & INFINIPATH_IBCC_LINKCMD_MASK; - ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate " + ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate " "is %s\n", dd->ipath_unit, what[linkcmd], ipath_ibcstatus_str[ @@ -1520,7 +1574,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); /* flush all queued sends when going to DOWN or INIT, to be sure that - * they don't block SMA and other MAD packets */ + * they don't block MAD packets */ if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) { ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, INFINIPATH_S_ABORT); @@ -1534,6 +1588,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) dd->ipath_ibcctrl | which); } +int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) +{ + u32 lstate; + int ret; + + switch (newstate) { + case IPATH_IB_LINKDOWN: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_SLEEP: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_DISABLE: + ipath_set_ib_lstate(dd, + INFINIPATH_IBCC_LINKINITCMD_DISABLE << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKINIT: + if (dd->ipath_flags & IPATH_LINKINIT) { + ret = 0; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << + INFINIPATH_IBCC_LINKCMD_SHIFT); + lstate = IPATH_LINKINIT; + break; + + case IPATH_IB_LINKARM: + if (dd->ipath_flags & IPATH_LINKARMED) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & + (IPATH_LINKINIT | IPATH_LINKACTIVE))) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << + INFINIPATH_IBCC_LINKCMD_SHIFT); + /* + * Since the port can transition to ACTIVE by receiving + * a non VL 15 packet, wait for either state. + */ + lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; + break; + + case IPATH_IB_LINKACTIVE: + if (dd->ipath_flags & IPATH_LINKACTIVE) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & IPATH_LINKARMED)) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << + INFINIPATH_IBCC_LINKCMD_SHIFT); + lstate = IPATH_LINKACTIVE; + break; + + default: + ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); + ret = -EINVAL; + goto bail; + } + ret = ipath_wait_linkstate(dd, lstate, 2000); + +bail: + return ret; +} + +/** + * ipath_set_mtu - set the MTU + * @dd: the infinipath device + * @arg: the new MTU + * + * we can handle "any" incoming size, the issue here is whether we + * need to restrict our outgoing size. For now, we don't do any + * sanity checking on this, and we don't deal with what happens to + * programs that are already running when the size changes. + * NOTE: changing the MTU will usually cause the IBC to go back to + * link initialize (IPATH_IBSTATE_INIT) state... + */ +int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) +{ + u32 piosize; + int changed = 0; + int ret; + + /* + * mtu is IB data payload max. It's the largest power of 2 less + * than piosize (or even larger, since it only really controls the + * largest we can receive; we can send the max of the mtu and + * piosize). We check that it's one of the valid IB sizes. + */ + if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && + arg != 4096) { + ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); + ret = -EINVAL; + goto bail; + } + if (dd->ipath_ibmtu == arg) { + ret = 0; /* same as current */ + goto bail; + } + + piosize = dd->ipath_ibmaxlen; + dd->ipath_ibmtu = arg; + + if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { + /* Only if it's not the initial value (or reset to it) */ + if (piosize != dd->ipath_init_ibmaxlen) { + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { + piosize = arg + IPATH_PIO_MAXIBHDR; + ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " + "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, + arg); + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + + if (changed) { + /* + * set the IBC maxpktlength to the size of our pio + * buffers in words + */ + u64 ibc = dd->ipath_ibcctrl; + ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << + INFINIPATH_IBCC_MAXPKTLEN_SHIFT); + + piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ + dd->ipath_ibmaxlen = piosize; + piosize /= sizeof(u32); /* in words */ + /* + * for ICRC, which we only send in diag test pkt mode, and + * we don't need to worry about that for mtu + */ + piosize += 1; + + ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + dd->ipath_ibcctrl = ibc; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + dd->ipath_f_tidtemplate(dd); + } + + ret = 0; + +bail: + return ret; +} + +int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) +{ + dd->ipath_lid = arg; + dd->ipath_lmc = lmc; + + return 0; +} + /** * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register * @dd: the infinipath device @@ -1637,13 +1865,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* - * we are shutting down, so tell the layered driver. We don't do - * this on just a link state change, much like ethernet, a cable - * unplug, etc. doesn't change driver state - */ - ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN); - /* disable IBC */ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, @@ -1699,7 +1920,7 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) pd->port_rcvhdrq = NULL; if (pd->port_rcvhdrtail_kvaddr) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *)pd->port_rcvhdrtail_kvaddr, + pd->port_rcvhdrtail_kvaddr, pd->port_rcvhdrqtailaddr_phys); pd->port_rcvhdrtail_kvaddr = NULL; } @@ -1718,24 +1939,32 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) dma_free_coherent(&dd->pcidev->dev, size, base, pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; pd->port_rcvegrbuf_chunks = 0; - } else if (pd->port_port == 0 && dd->ipath_port0_skbs) { + } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { unsigned e; - struct sk_buff **skbs = dd->ipath_port0_skbs; + struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; - dd->ipath_port0_skbs = NULL; - ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs " - "@ %p\n", pd->port_port, skbs); + dd->ipath_port0_skbinfo = NULL; + ipath_cdbg(VERBOSE, "free closed port %d " + "ipath_port0_skbinfo @ %p\n", pd->port_port, + skbinfo); for (e = 0; e < dd->ipath_rcvegrcnt; e++) - if (skbs[e]) - dev_kfree_skb(skbs[e]); - vfree(skbs); + if (skbinfo[e].skb) { + pci_unmap_single(dd->pcidev, skbinfo[e].phys, + dd->ipath_ibmaxlen, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(skbinfo[e].skb); + } + vfree(skbinfo); } kfree(pd->port_tid_pg_list); + vfree(pd->subport_uregbase); + vfree(pd->subport_rcvegrbuf); + vfree(pd->subport_rcvhdr_base); kfree(pd); } @@ -1743,7 +1972,7 @@ static int __init infinipath_init(void) { int ret; - ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version); + ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); /* * These must be called before the driver is registered with @@ -1776,8 +2005,18 @@ static int __init infinipath_init(void) goto bail_group; } + ret = ipath_diagpkt_add(); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " + "diag data device: error %d\n", -ret); + goto bail_ipathfs; + } + goto bail; +bail_ipathfs: + ipath_exit_ipathfs(); + bail_group: ipath_driver_remove_group(&ipath_driver.driver); @@ -1791,148 +2030,12 @@ bail: return ret; } -static void cleanup_device(struct ipath_devdata *dd) -{ - int port; - - ipath_shutdown_device(dd); - - if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { - /* can't do anything more with chip; needs re-init */ - *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; - if (dd->ipath_kregbase) { - /* - * if we haven't already cleaned up before these are - * to ensure any register reads/writes "fail" until - * re-init - */ - dd->ipath_kregbase = NULL; - dd->ipath_uregbase = 0; - dd->ipath_sregbase = 0; - dd->ipath_cregbase = 0; - dd->ipath_kregsize = 0; - } - ipath_disable_wc(dd); - } - - if (dd->ipath_pioavailregs_dma) { - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *) dd->ipath_pioavailregs_dma, - dd->ipath_pioavailregs_phys); - dd->ipath_pioavailregs_dma = NULL; - } - if (dd->ipath_dummy_hdrq) { - dma_free_coherent(&dd->pcidev->dev, - dd->ipath_pd[0]->port_rcvhdrq_size, - dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); - dd->ipath_dummy_hdrq = NULL; - } - - if (dd->ipath_pageshadow) { - struct page **tmpp = dd->ipath_pageshadow; - int i, cnt = 0; - - ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " - "locked\n"); - for (port = 0; port < dd->ipath_cfgports; port++) { - int port_tidbase = port * dd->ipath_rcvtidcnt; - int maxtid = port_tidbase + dd->ipath_rcvtidcnt; - for (i = port_tidbase; i < maxtid; i++) { - if (!tmpp[i]) - continue; - ipath_release_user_pages(&tmpp[i], 1); - tmpp[i] = NULL; - cnt++; - } - } - if (cnt) { - ipath_stats.sps_pageunlocks += cnt; - ipath_cdbg(VERBOSE, "There were still %u expTID " - "entries locked\n", cnt); - } - if (ipath_stats.sps_pagelocks || - ipath_stats.sps_pageunlocks) - ipath_cdbg(VERBOSE, "%llu pages locked, %llu " - "unlocked via ipath_m{un}lock\n", - (unsigned long long) - ipath_stats.sps_pagelocks, - (unsigned long long) - ipath_stats.sps_pageunlocks); - - ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", - dd->ipath_pageshadow); - vfree(dd->ipath_pageshadow); - dd->ipath_pageshadow = NULL; - } - - /* - * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, not cfgports, because cfgports - * could have changed while we were loaded. - */ - for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = dd->ipath_pd[port]; - dd->ipath_pd[port] = NULL; - ipath_free_pddata(dd, pd); - } - kfree(dd->ipath_pd); - /* - * debuggability, in case some cleanup path tries to use it - * after this - */ - dd->ipath_pd = NULL; -} - static void __exit infinipath_cleanup(void) { - struct ipath_devdata *dd, *tmp; - unsigned long flags; - ipath_exit_ipathfs(); ipath_driver_remove_group(&ipath_driver.driver); - spin_lock_irqsave(&ipath_devs_lock, flags); - - /* - * turn off rcv, send, and interrupts for all ports, all drivers - * should also hard reset the chip here? - * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs - * for all versions of the driver, if they were allocated - */ - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - if (dd->ipath_kregbase) - cleanup_device(dd); - - if (dd->pcidev) { - if (dd->pcidev->irq) { - ipath_cdbg(VERBOSE, - "unit %u free_irq of irq %x\n", - dd->ipath_unit, dd->pcidev->irq); - free_irq(dd->pcidev->irq, dd); - } else - ipath_dbg("irq is 0, not doing free_irq " - "for unit %u\n", dd->ipath_unit); - - /* - * we check for NULL here, because it's outside - * the kregbase check, and we need to call it - * after the free_irq. Thus it's possible that - * the function pointers were never initialized. - */ - if (dd->ipath_f_cleanup) - /* clean up chip-specific stuff */ - dd->ipath_f_cleanup(dd); - - dd->pcidev = NULL; - } - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); pci_unregister_driver(&ipath_driver); @@ -1998,5 +2101,22 @@ bail: return ret; } +int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) +{ + u64 val; + if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) { + return -1; + } + if ( dd->ipath_rx_pol_inv != new_pol_inv ) { + dd->ipath_rx_pol_inv = new_pol_inv; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= ((u64)dd->ipath_rx_pol_inv) << + INFINIPATH_XGXS_RX_POL_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + } + return 0; +} module_init(infinipath_init); module_exit(infinipath_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 3313356ab93..a4019a6b756 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -100,9 +100,9 @@ static int i2c_gpio_set(struct ipath_devdata *dd, gpioval = &dd->ipath_gpio_out; read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); if (line == i2c_line_scl) - mask = ipath_gpio_scl; + mask = dd->ipath_gpio_scl; else - mask = ipath_gpio_sda; + mask = dd->ipath_gpio_sda; if (new_line_state == i2c_line_high) /* tri-state the output rather than force high */ @@ -119,12 +119,12 @@ static int i2c_gpio_set(struct ipath_devdata *dd, write_val = 0x0UL; if (line == i2c_line_scl) { - write_val <<= ipath_gpio_scl_num; - *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num); + write_val <<= dd->ipath_gpio_scl_num; + *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num); *gpioval |= write_val; } else { - write_val <<= ipath_gpio_sda_num; - *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num); + write_val <<= dd->ipath_gpio_sda_num; + *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num); *gpioval |= write_val; } ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); @@ -157,9 +157,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd, read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); /* config line to be an input */ if (line == i2c_line_scl) - mask = ipath_gpio_scl; + mask = dd->ipath_gpio_scl; else - mask = ipath_gpio_sda; + mask = dd->ipath_gpio_sda; write_val = read_val & ~mask; ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); @@ -187,6 +187,7 @@ bail: static void i2c_wait_for_writes(struct ipath_devdata *dd) { (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + rmb(); } static void scl_out(struct ipath_devdata *dd, u8 bit) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index bbaa70e57db..a9ddc6911f6 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -39,9 +39,14 @@ #include <asm/pgtable.h> #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" +/* + * mmap64 doesn't allow all 64 bits for 32-bit applications + * so only use the low 43 bits. + */ +#define MMAP64_MASK 0x7FFFFFFFFFFUL + static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, @@ -58,18 +63,35 @@ static struct file_operations ipath_file_ops = { .mmap = ipath_mmap }; -static int ipath_get_base_info(struct ipath_portdata *pd, +static int ipath_get_base_info(struct file *fp, void __user *ubase, size_t ubase_size) { + struct ipath_portdata *pd = port_fp(fp); int ret = 0; struct ipath_base_info *kinfo = NULL; struct ipath_devdata *dd = pd->port_dd; + unsigned subport_cnt; + int shared, master; + size_t sz; + + subport_cnt = pd->port_subport_cnt; + if (!subport_cnt) { + shared = 0; + master = 0; + subport_cnt = 1; + } else { + shared = 1; + master = !subport_fp(fp); + } - if (ubase_size < sizeof(*kinfo)) { + sz = sizeof(*kinfo); + /* If port sharing is not requested, allow the old size structure */ + if (!shared) + sz -= 3 * sizeof(u64); + if (ubase_size < sz) { ipath_cdbg(PROC, - "Base size %lu, need %lu (version mismatch?)\n", - (unsigned long) ubase_size, - (unsigned long) sizeof(*kinfo)); + "Base size %zu, need %zu (version mismatch?)\n", + ubase_size, sz); ret = -EINVAL; goto bail; } @@ -96,7 +118,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd, kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / pd->port_rcvegrbuf_chunks; - kinfo->spi_tidcnt = dd->ipath_rcvtidcnt; + kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; + if (master) + kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; /* * for this use, may be ipath_cfgports summed over all chips that * are are configured and present @@ -119,31 +143,75 @@ static int ipath_get_base_info(struct ipath_portdata *pd, * page_address() macro worked, but in 2.6.11, even that returns the * full 64 bit address (upper bits all 1's). So far, using the * physical addresses (or chip offsets, for chip mapping) works, but - * no doubt some future kernel release will chang that, and we'll be - * on to yet another method of dealing with this + * no doubt some future kernel release will change that, and we'll be + * on to yet another method of dealing with this. */ kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; - kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + (void *) dd->ipath_statusp - (void *) dd->ipath_pioavailregs_dma; - kinfo->spi_piobufbase = (u64) pd->port_piobufs; - kinfo->__spi_uregbase = - dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + if (!shared) { + kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_piobufbase = (u64) pd->port_piobufs; + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else if (master) { + kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + + (dd->ipath_pbufsport % subport_cnt); + /* Master's PIO buffers are after all the slave's */ + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * + (dd->ipath_pbufsport - kinfo->spi_piocnt); + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else { + unsigned slave = subport_fp(fp) - 1; + + kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * kinfo->spi_piocnt * slave; + kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + + PAGE_SIZE * slave) & MMAP64_MASK; - kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1); - kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * slave) & MMAP64_MASK; + kinfo->spi_rcvhdr_tailaddr = + (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; + kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + + dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & + MMAP64_MASK; + } + + kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / + dd->ipath_palign; kinfo->spi_pioalign = dd->ipath_palign; kinfo->spi_qpair = IPATH_KD_QP; kinfo->spi_piosize = dd->ipath_ibmaxlen; kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ kinfo->spi_port = pd->port_port; + kinfo->spi_subport = subport_fp(fp); kinfo->spi_sw_version = IPATH_KERN_SWVERSION; kinfo->spi_hw_version = dd->ipath_revision; + if (master) { + kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; + kinfo->spi_subport_uregbase = + (u64) pd->subport_uregbase & MMAP64_MASK; + kinfo->spi_subport_rcvegrbuf = + (u64) pd->subport_rcvegrbuf & MMAP64_MASK; + kinfo->spi_subport_rcvhdr_base = + (u64) pd->subport_rcvhdr_base & MMAP64_MASK; + ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", + kinfo->spi_port, kinfo->spi_runtime_flags, + (unsigned long long) kinfo->spi_subport_uregbase, + (unsigned long long) kinfo->spi_subport_rcvegrbuf, + (unsigned long long) kinfo->spi_subport_rcvhdr_base); + } + if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) ret = -EFAULT; @@ -155,6 +223,7 @@ bail: /** * ipath_tid_update - update a port TID * @pd: the port + * @fp: the ipath device file * @ti: the TID information * * The new implementation as of Oct 2004 is that the driver assigns @@ -177,11 +246,11 @@ bail: * virtually contiguous pages, that should change to improve * performance. */ -static int ipath_tid_update(struct ipath_portdata *pd, +static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, const struct ipath_tid_info *ti) { int ret = 0, ntids; - u32 tid, porttid, cnt, i, tidcnt; + u32 tid, porttid, cnt, i, tidcnt, tidoff; u16 *tidlist; struct ipath_devdata *dd = pd->port_dd; u64 physaddr; @@ -189,6 +258,7 @@ static int ipath_tid_update(struct ipath_portdata *pd, u64 __iomem *tidbase; unsigned long tidmap[8]; struct page **pagep = NULL; + unsigned subport = subport_fp(fp); if (!dd->ipath_pageshadow) { ret = -ENOMEM; @@ -205,20 +275,34 @@ static int ipath_tid_update(struct ipath_portdata *pd, ret = -EFAULT; goto done; } - tidcnt = dd->ipath_rcvtidcnt; - if (cnt >= tidcnt) { + porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) { + tidcnt = dd->ipath_rcvtidcnt; + tid = pd->port_tidcursor; + tidoff = 0; + } else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + tidoff = dd->ipath_rcvtidcnt - tidcnt; + porttid += tidoff; + tid = tidcursor_fp(fp); + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + tidoff = tidcnt * (subport - 1); + porttid += tidoff; + tid = tidcursor_fp(fp); + } + if (cnt > tidcnt) { /* make sure it all fits in port_tid_pg_list */ dev_info(&dd->pcidev->dev, "Process tried to allocate %u " "TIDs, only trying max (%u)\n", cnt, tidcnt); cnt = tidcnt; } - pagep = (struct page **)pd->port_tid_pg_list; - tidlist = (u16 *) (&pagep[cnt]); + pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; + tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; memset(tidmap, 0, sizeof(tidmap)); - tid = pd->port_tidcursor; /* before decrement; chip actual # */ - porttid = pd->port_port * tidcnt; ntids = tidcnt; tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + dd->ipath_rcvtidbase + @@ -275,16 +359,19 @@ static int ipath_tid_update(struct ipath_portdata *pd, ret = -ENOMEM; break; } - tidlist[i] = tid; + tidlist[i] = tid + tidoff; ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " - "vaddr %lx\n", i, tid, vaddr); + "vaddr %lx\n", i, tid + tidoff, vaddr); /* we "know" system pages and TID pages are same size */ dd->ipath_pageshadow[porttid + tid] = pagep[i]; + dd->ipath_physshadow[porttid + tid] = ipath_map_page( + dd->pcidev, pagep[i], 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); /* * don't need atomic or it's overhead */ __set_bit(tid, tidmap); - physaddr = page_to_phys(pagep[i]); + physaddr = dd->ipath_physshadow[porttid + tid]; ipath_stats.sps_pagelocks++; ipath_cdbg(VERBOSE, "TID %u, vaddr %lx, physaddr %llx pgp %p\n", @@ -318,6 +405,9 @@ static int ipath_tid_update(struct ipath_portdata *pd, tid); dd->ipath_f_put_tid(dd, &tidbase[tid], 1, dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); dd->ipath_pageshadow[porttid + tid] = NULL; ipath_stats.sps_pageunlocks++; } @@ -342,7 +432,10 @@ static int ipath_tid_update(struct ipath_portdata *pd, } if (tid == tidcnt) tid = 0; - pd->port_tidcursor = tid; + if (!pd->port_subport_cnt) + pd->port_tidcursor = tid; + else + tidcursor_fp(fp) = tid; } done: @@ -355,6 +448,7 @@ done: /** * ipath_tid_free - free a port TID * @pd: the port + * @subport: the subport * @ti: the TID info * * right now we are unlocking one page at a time, but since @@ -368,7 +462,7 @@ done: * they pass in to us. */ -static int ipath_tid_free(struct ipath_portdata *pd, +static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, const struct ipath_tid_info *ti) { int ret = 0; @@ -389,11 +483,20 @@ static int ipath_tid_free(struct ipath_portdata *pd, } porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) + tidcnt = dd->ipath_rcvtidcnt; + else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + porttid += dd->ipath_rcvtidcnt - tidcnt; + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + porttid += tidcnt * (subport - 1); + } tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + dd->ipath_rcvtidbase + porttid * sizeof(*tidbase)); - tidcnt = dd->ipath_rcvtidcnt; limit = sizeof(tidmap) * BITS_PER_BYTE; if (limit > tidcnt) /* just in case size changes in future */ @@ -418,6 +521,9 @@ static int ipath_tid_free(struct ipath_portdata *pd, pd->port_pid, tid); dd->ipath_f_put_tid(dd, &tidbase[tid], 1, dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); ipath_release_user_pages( &dd->ipath_pageshadow[porttid + tid], 1); dd->ipath_pageshadow[porttid + tid] = NULL; @@ -582,20 +688,24 @@ bail: /** * ipath_manage_rcvq - manage a port's receive queue * @pd: the port + * @subport: the subport * @start_stop: action to carry out * * start_stop == 0 disables receive on the port, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */ -static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) +static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, + int start_stop) { struct ipath_devdata *dd = pd->port_dd; u64 tval; - ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n", + ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", start_stop ? "en" : "dis", dd->ipath_unit, - pd->port_port); + pd->port_port, subport); + if (subport) + goto bail; /* atomically clear receive enable port. */ if (start_stop) { /* @@ -610,7 +720,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) * updated and correct itself, even in the face of software * bugs. */ - *pd->port_rcvhdrtail_kvaddr = 0; + *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0; set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, &dd->ipath_rcvctrl); } else @@ -631,6 +741,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); } /* always; new head should be equal to new tail; see above */ +bail: return 0; } @@ -688,6 +799,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd, } } +/* + * Initialize the port data with the receive buffer sizes + * so this can be done while the master port is locked. + * Otherwise, there is a race with a slave opening the port + * and seeing these fields uninitialized. + */ +static void init_user_egr_sizes(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned egrperchunk, egrcnt, size; + + /* + * to avoid wasting a lot of memory, we allocate 32KB chunks of + * physically contiguous memory, advance through it until used up + * and then allocate more. Of course, we need memory to store those + * extra pointers, now. Started out with 256KB, but under heavy + * memory pressure (creating large files and then copying them over + * NFS while doing lots of MPI jobs), we hit some allocation + * failures, even though we can sleep... (2.6.10) Still get + * failures at 64K. 32K is the lowest we can go without wasting + * additional memory. + */ + size = 0x8000; + egrperchunk = size / dd->ipath_rcvegrbufsize; + egrcnt = dd->ipath_rcvegrcnt; + pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; + pd->port_rcvegrbufs_perchunk = egrperchunk; + pd->port_rcvegrbuf_size = size; +} + /** * ipath_create_user_egr - allocate eager TID buffers * @pd: the port to allocate TID buffers for @@ -703,7 +844,7 @@ static void ipath_clean_part_key(struct ipath_portdata *pd, static int ipath_create_user_egr(struct ipath_portdata *pd) { struct ipath_devdata *dd = pd->port_dd; - unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff; + unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; int ret; gfp_t gfp_flags; @@ -723,31 +864,18 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); - /* - * to avoid wasting a lot of memory, we allocate 32KB chunks of - * physically contiguous memory, advance through it until used up - * and then allocate more. Of course, we need memory to store those - * extra pointers, now. Started out with 256KB, but under heavy - * memory pressure (creating large files and then copying them over - * NFS while doing lots of MPI jobs), we hit some allocation - * failures, even though we can sleep... (2.6.10) Still get - * failures at 64K. 32K is the lowest we can go without wasting - * additional memory. - */ - size = 0x8000; - alloced = ALIGN(egrsize * egrcnt, size); - egrperchunk = size / egrsize; - chunk = (egrcnt + egrperchunk - 1) / egrperchunk; - pd->port_rcvegrbuf_chunks = chunk; - pd->port_rcvegrbufs_perchunk = egrperchunk; - pd->port_rcvegrbuf_size = size; - pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0])); + chunk = pd->port_rcvegrbuf_chunks; + egrperchunk = pd->port_rcvegrbufs_perchunk; + size = pd->port_rcvegrbuf_size; + pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf) { ret = -ENOMEM; goto bail; } pd->port_rcvegrbuf_phys = - vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0])); + kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf_phys) { ret = -ENOMEM; goto bail_rcvegrbuf; @@ -792,105 +920,23 @@ bail_rcvegrbuf_phys: pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; bail_rcvegrbuf: - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; bail: return ret; } -static int ipath_do_user_init(struct ipath_portdata *pd, - const struct ipath_user_info *uinfo) -{ - int ret = 0; - struct ipath_devdata *dd = pd->port_dd; - u32 head32; - - /* for now, if major version is different, bail */ - if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { - dev_info(&dd->pcidev->dev, - "User major version %d not same as driver " - "major %d\n", uinfo->spu_userversion >> 16, - IPATH_USER_SWMAJOR); - ret = -ENODEV; - goto done; - } - - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) - ipath_dbg("User minor version %d not same as driver " - "minor %d\n", uinfo->spu_userversion & 0xffff, - IPATH_USER_SWMINOR); - - if (uinfo->spu_rcvhdrsize) { - ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); - if (ret) - goto done; - } - - /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ - - /* for right now, kernel piobufs are at end, so port 1 is at 0 */ - pd->port_piobufs = dd->ipath_piobufbase + - dd->ipath_pbufsport * (pd->port_port - - 1) * dd->ipath_palign; - ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", - pd->port_port, pd->port_piobufs); - - /* - * Now allocate the rcvhdr Q and eager TIDs; skip the TID - * array for time being. If pd->port_port > chip-supported, - * we need to do extra stuff here to handle by handling overflow - * through port 0, someday - */ - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = ipath_create_user_egr(pd); - if (ret) - goto done; - - /* - * set the eager head register for this port to the current values - * of the tail pointers, since we don't know if they were - * updated on last use of the port. - */ - head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); - ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - dd->ipath_lastegrheads[pd->port_port] = -1; - dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; - ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", - pd->port_port, head32); - pd->port_tidcursor = 0; /* start at beginning after open */ - /* - * now enable the port; the tail registers will be written to memory - * by the chip as soon as it sees the write to - * dd->ipath_kregs->kr_rcvctrl. The update only happens on - * transition from 0 to 1, so clear it first, then set it as part of - * enabling the port. This will (very briefly) affect any other - * open ports, but it shouldn't be long enough to be an issue. - * We explictly set the in-memory copy to 0 beforehand, so we don't - * have to wait to be sure the DMA update has happened. - */ - *pd->port_rcvhdrtail_kvaddr = 0ULL; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, - &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); -done: - return ret; -} - /* common code for the mappings on dma_alloc_coherent mem */ static int ipath_mmap_mem(struct vm_area_struct *vma, - struct ipath_portdata *pd, unsigned len, - int write_ok, dma_addr_t addr, char *what) + struct ipath_portdata *pd, unsigned len, int write_ok, + void *kvaddr, char *what) { struct ipath_devdata *dd = pd->port_dd; - unsigned pfn = (unsigned long)addr >> PAGE_SHIFT; + unsigned long pfn; int ret; if ((vma->vm_end - vma->vm_start) > len) { @@ -913,17 +959,17 @@ static int ipath_mmap_mem(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYWRITE; } + pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - dev_info(&dd->pcidev->dev, - "%s port%u mmap of %lx, %x bytes r%c failed: %d\n", - what, pd->port_port, (unsigned long)addr, len, - write_ok?'w':'o', ret); + dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " + "bytes r%c failed: %d\n", what, pd->port_port, + pfn, len, write_ok?'w':'o', ret); else - ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n", - what, pd->port_port, (unsigned long)addr, len, - write_ok?'w':'o'); + ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " + "r%c\n", what, pd->port_port, pfn, len, + write_ok?'w':'o'); bail: return ret; } @@ -958,7 +1004,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, static int mmap_piobufs(struct vm_area_struct *vma, struct ipath_devdata *dd, - struct ipath_portdata *pd) + struct ipath_portdata *pd, + unsigned piobufs, unsigned piocnt) { unsigned long phys; int ret; @@ -969,31 +1016,32 @@ static int mmap_piobufs(struct vm_area_struct *vma, * process data, and catches users who might try to read the i/o * space due to a bug. */ - if ((vma->vm_end - vma->vm_start) > - (dd->ipath_pbufsport * dd->ipath_palign)) { + if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " "reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); - ret = -EFAULT; + ret = -EINVAL; goto bail; } - phys = dd->ipath_physaddr + pd->port_piobufs; + phys = dd->ipath_physaddr + piobufs; /* * Don't mark this as non-cached, or we don't get the * write combining behavior we want on the PIO buffers! */ - if (vma->vm_flags & VM_READ) { - dev_info(&dd->pcidev->dev, - "Can't map piobufs as readable (flags=%lx)\n", - vma->vm_flags); - ret = -EPERM; - goto bail; - } +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; + pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; +#endif - /* don't allow them to later change to readable with mprotect */ + /* + * don't allow them to later change to readable with mprotect (for when + * not initially mapped readable, as is normally the case) + */ vma->vm_flags &= ~VM_MAYREAD; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; @@ -1010,7 +1058,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, struct ipath_devdata *dd = pd->port_dd; unsigned long start, size; size_t total_size, i; - dma_addr_t *phys; + unsigned long pfn; int ret; size = pd->port_rcvegrbuf_size; @@ -1020,7 +1068,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, "reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); - ret = -EFAULT; + ret = -EINVAL; goto bail; } @@ -1034,11 +1082,11 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYWRITE; start = vma->vm_start; - phys = pd->port_rcvegrbuf_phys; for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { - ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT, - size, vma->vm_page_prot); + pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, start, pfn, size, + vma->vm_page_prot); if (ret < 0) goto bail; } @@ -1048,6 +1096,122 @@ bail: return ret; } +/* + * ipath_file_vma_nopage - handle a VMA page fault. + */ +static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *page = NOPAGE_SIGBUS; + void *pageptr; + + /* + * Convert the vmalloc address into a struct page. + */ + pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT)); + page = vmalloc_to_page(pageptr); + if (!page) + goto out; + + /* Increment the reference count. */ + get_page(page); + if (type) + *type = VM_FAULT_MINOR; +out: + return page; +} + +static struct vm_operations_struct ipath_file_vm_ops = { + .nopage = ipath_file_vma_nopage, +}; + +static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, + struct ipath_portdata *pd, unsigned subport) +{ + unsigned long len; + struct ipath_devdata *dd; + void *addr; + size_t size; + int ret; + + /* If the port is not shared, all addresses should be physical */ + if (!pd->port_subport_cnt) { + ret = -EINVAL; + goto bail; + } + + dd = pd->port_dd; + size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; + + /* + * Master has all the slave uregbase, rcvhdrq, and + * rcvegrbufs mmapped. + */ + if (subport == 0) { + unsigned num_slaves = pd->port_subport_cnt - 1; + + if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { + addr = pd->subport_uregbase; + size = PAGE_SIZE * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base; + size = pd->port_rcvhdrq_size * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & + MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf; + size *= num_slaves; + } else { + ret = -EINVAL; + goto bail; + } + } else if (pgaddr == (((u64) pd->subport_uregbase + + PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); + size = PAGE_SIZE; + } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1)) & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1); + size = pd->port_rcvhdrq_size; + } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + + size * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf + size * (subport - 1); + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; + } else { + ret = -EINVAL; + goto bail; + } + len = vma->vm_end - vma->vm_start; + if (len > size) { + ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); + ret = -EINVAL; + goto bail; + } + + vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; + vma->vm_ops = &ipath_file_vm_ops; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + ret = 0; + +bail: + return ret; +} + /** * ipath_mmap - mmap various structures into user space * @fp: the file pointer @@ -1063,73 +1227,99 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) struct ipath_portdata *pd; struct ipath_devdata *dd; u64 pgaddr, ureg; + unsigned piobufs, piocnt; int ret; pd = port_fp(fp); + if (!pd) { + ret = -EINVAL; + goto bail; + } dd = pd->port_dd; /* * This is the ipath_do_user_init() code, mapping the shared buffers * into the user process. The address referred to by vm_pgoff is the - * virtual, not physical, address; we only do one mmap for each - * space mapped. + * file offset passed via mmap(). For shared ports, this is the + * kernel vmalloc() address of the pages to share with the master. + * For non-shared or master ports, this is a physical address. + * We only do one mmap for each space mapped. */ pgaddr = vma->vm_pgoff << PAGE_SHIFT; /* - * Must fit in 40 bits for our hardware; some checked elsewhere, - * but we'll be paranoid. Check for 0 is mostly in case one of the - * allocations failed, but user called mmap anyway. We want to catch - * that before it can match. + * Check for 0 in case one of the allocations failed, but user + * called mmap anyway. */ - if (!pgaddr || pgaddr >= (1ULL<<40)) { - ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n", - (unsigned long long)pgaddr, vma->vm_start, vma->vm_end); - return -EINVAL; + if (!pgaddr) { + ret = -EINVAL; + goto bail; } - /* just the offset of the port user registers, not physical addr */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; - - ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n", + ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", (unsigned long long) pgaddr, vma->vm_start, - vma->vm_end - vma->vm_start); + vma->vm_end - vma->vm_start, dd->ipath_unit, + pd->port_port, subport_fp(fp)); - if (vma->vm_start & (PAGE_SIZE-1)) { - ipath_dev_err(dd, - "vm_start not aligned: %lx, end=%lx phys %lx\n", - vma->vm_start, vma->vm_end, (unsigned long)pgaddr); - ret = -EINVAL; + /* + * Physical addresses must fit in 40 bits for our hardware. + * Check for kernel virtual addresses first, anything else must + * match a HW or memory address. + */ + if (pgaddr >= (1ULL<<40)) { + ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + goto bail; + } + + if (!pd->port_subport_cnt) { + /* port is not shared */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = dd->ipath_pbufsport; + piobufs = pd->port_piobufs; + } else if (!subport_fp(fp)) { + /* caller is the master */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + + (dd->ipath_pbufsport % pd->port_subport_cnt); + piobufs = pd->port_piobufs + + dd->ipath_palign * (dd->ipath_pbufsport - piocnt); + } else { + unsigned slave = subport_fp(fp) - 1; + + /* caller is a slave */ + ureg = 0; + piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; + piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; } - else if (pgaddr == ureg) + + if (pgaddr == ureg) ret = mmap_ureg(vma, dd, ureg); - else if (pgaddr == pd->port_piobufs) - ret = mmap_piobufs(vma, dd, pd); - else if (pgaddr == (u64) pd->port_rcvegr_phys) + else if (pgaddr == piobufs) + ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); + else if (pgaddr == dd->ipath_pioavailregs_phys) + /* in-memory copy of pioavail registers */ + ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, + (void *) dd->ipath_pioavailregs_dma, + "pioavail registers"); + else if (subport_fp(fp)) + /* Subports don't mmap the physical receive buffers */ + ret = -EINVAL; + else if (pgaddr == pd->port_rcvegr_phys) ret = mmap_rcvegrbufs(vma, pd); - else if (pgaddr == (u64) pd->port_rcvhdrq_phys) { + else if (pgaddr == (u64) pd->port_rcvhdrq_phys) /* - * The rcvhdrq itself; readonly except on HT-400 (so have + * The rcvhdrq itself; readonly except on HT (so have * to allow writable mapping), multiple pages, contiguous * from an i/o perspective. */ - unsigned total_size = - ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize - * sizeof(u32), PAGE_SIZE); - ret = ipath_mmap_mem(vma, pd, total_size, 1, - pd->port_rcvhdrq_phys, + ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, + pd->port_rcvhdrq, "rcvhdrq"); - } - else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys) + else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) /* in-memory copy of rcvhdrq tail register */ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - pd->port_rcvhdrqtailaddr_phys, + pd->port_rcvhdrtail_kvaddr, "rcvhdrq tail"); - else if (pgaddr == dd->ipath_pioavailregs_phys) - /* in-memory copy of pioavail registers */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - dd->ipath_pioavailregs_phys, - "pioavail registers"); else ret = -EINVAL; @@ -1137,9 +1327,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) if (ret < 0) dev_info(&dd->pcidev->dev, - "Failure %d on addr %lx, off %lx\n", - -ret, vma->vm_start, vma->vm_pgoff); - + "Failure %d on off %llx len %lx\n", + -ret, (unsigned long long)pgaddr, + vma->vm_end - vma->vm_start); +bail: return ret; } @@ -1149,9 +1340,12 @@ static unsigned int ipath_poll(struct file *fp, struct ipath_portdata *pd; u32 head, tail; int bit; + unsigned pollflag = 0; struct ipath_devdata *dd; pd = port_fp(fp); + if (!pd) + goto bail; dd = pd->port_dd; bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; @@ -1174,7 +1368,7 @@ static unsigned int ipath_poll(struct file *fp, if (tail == head) { set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ + if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ (void)ipath_write_ureg(dd, ur_rcvhdrhead, dd->ipath_rhdrhead_intr_off | head, pd->port_port); @@ -1185,9 +1379,12 @@ static unsigned int ipath_poll(struct file *fp, clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); pd->port_rcvwait_to++; } + else + pollflag = POLLIN | POLLRDNORM; } else { /* it's already happened; don't do wait_event overhead */ + pollflag = POLLIN | POLLRDNORM; pd->port_rcvnowait++; } @@ -1195,18 +1392,80 @@ static unsigned int ipath_poll(struct file *fp, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); - return 0; +bail: + return pollflag; +} + +static int init_subports(struct ipath_devdata *dd, + struct ipath_portdata *pd, + const struct ipath_user_info *uinfo) +{ + int ret = 0; + unsigned num_slaves; + size_t size; + + /* Old user binaries don't know about subports */ + if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) + goto bail; + /* + * If the user is requesting zero or one port, + * skip the subport allocation. + */ + if (uinfo->spu_subport_cnt <= 1) + goto bail; + if (uinfo->spu_subport_cnt > 4) { + ret = -EINVAL; + goto bail; + } + + num_slaves = uinfo->spu_subport_cnt - 1; + pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); + if (!pd->subport_uregbase) { + ret = -ENOMEM; + goto bail; + } + /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ + size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * + sizeof(u32), PAGE_SIZE) * num_slaves; + pd->subport_rcvhdr_base = vmalloc(size); + if (!pd->subport_rcvhdr_base) { + ret = -ENOMEM; + goto bail_ureg; + } + + pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * + pd->port_rcvegrbuf_size * + num_slaves); + if (!pd->subport_rcvegrbuf) { + ret = -ENOMEM; + goto bail_rhdr; + } + + pd->port_subport_cnt = uinfo->spu_subport_cnt; + pd->port_subport_id = uinfo->spu_subport_id; + pd->active_slaves = 1; + goto bail; + +bail_rhdr: + vfree(pd->subport_rcvhdr_base); +bail_ureg: + vfree(pd->subport_uregbase); + pd->subport_uregbase = NULL; +bail: + return ret; } static int try_alloc_port(struct ipath_devdata *dd, int port, - struct file *fp) + struct file *fp, + const struct ipath_user_info *uinfo) { + struct ipath_portdata *pd; int ret; - if (!dd->ipath_pd[port]) { - void *p, *ptmp; + if (!(pd = dd->ipath_pd[port])) { + void *ptmp; - p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); + pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); /* * Allocate memory for use in ipath_tid_update() just once @@ -1216,34 +1475,36 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + dd->ipath_rcvtidcnt * sizeof(struct page **), GFP_KERNEL); - if (!p || !ptmp) { + if (!pd || !ptmp) { ipath_dev_err(dd, "Unable to allocate portdata " "memory, failing open\n"); ret = -ENOMEM; - kfree(p); + kfree(pd); kfree(ptmp); goto bail; } - dd->ipath_pd[port] = p; + dd->ipath_pd[port] = pd; dd->ipath_pd[port]->port_port = port; dd->ipath_pd[port]->port_dd = dd; dd->ipath_pd[port]->port_tid_pg_list = ptmp; init_waitqueue_head(&dd->ipath_pd[port]->port_wait); } - if (!dd->ipath_pd[port]->port_cnt) { - dd->ipath_pd[port]->port_cnt = 1; - fp->private_data = (void *) dd->ipath_pd[port]; + if (!pd->port_cnt) { + pd->userversion = uinfo->spu_userversion; + init_user_egr_sizes(pd); + if ((ret = init_subports(dd, pd, uinfo)) != 0) + goto bail; ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", current->comm, current->pid, dd->ipath_unit, port); - dd->ipath_pd[port]->port_pid = current->pid; - strncpy(dd->ipath_pd[port]->port_comm, current->comm, - sizeof(dd->ipath_pd[port]->port_comm)); + pd->port_cnt = 1; + port_fp(fp) = pd; + pd->port_pid = current->pid; + strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); ipath_stats.sps_ports++; ret = 0; - goto bail; - } - ret = -EBUSY; + } else + ret = -EBUSY; bail: return ret; @@ -1259,7 +1520,8 @@ static inline int usable(struct ipath_devdata *dd) | IPATH_LINKUNK)); } -static int find_free_port(int unit, struct file *fp) +static int find_free_port(int unit, struct file *fp, + const struct ipath_user_info *uinfo) { struct ipath_devdata *dd = ipath_lookup(unit); int ret, i; @@ -1274,8 +1536,8 @@ static int find_free_port(int unit, struct file *fp) goto bail; } - for (i = 0; i < dd->ipath_cfgports; i++) { - ret = try_alloc_port(dd, i, fp); + for (i = 1; i < dd->ipath_cfgports; i++) { + ret = try_alloc_port(dd, i, fp, uinfo); if (ret != -EBUSY) goto bail; } @@ -1285,26 +1547,27 @@ bail: return ret; } -static int find_best_unit(struct file *fp) +static int find_best_unit(struct file *fp, + const struct ipath_user_info *uinfo) { int ret = 0, i, prefunit = -1, devmax; int maxofallports, npresent, nup; int ndev; - (void) ipath_count_units(&npresent, &nup, &maxofallports); + devmax = ipath_count_units(&npresent, &nup, &maxofallports); /* * This code is present to allow a knowledgeable person to * specify the layout of processes to processors before opening * this driver, and then we'll assign the process to the "closest" - * HT-400 to that processor (we assume reasonable connectivity, + * InfiniPath chip to that processor (we assume reasonable connectivity, * for now). This code assumes that if affinity has been set * before this point, that at most one cpu is set; for now this * is reasonable. I check for both cpus_empty() and cpus_full(), * in case some kernel variant sets none of the bits when no * affinity is set. 2.6.11 and 12 kernels have all present * cpus set. Some day we'll have to fix it up further to handle - * a cpu subset. This algorithm fails for two HT-400's connected + * a cpu subset. This algorithm fails for two HT chips connected * in tunnel fashion. Eventually this needs real topology * information. There may be some issues with dual core numbering * as well. This needs more work prior to release. @@ -1338,8 +1601,6 @@ static int find_best_unit(struct file *fp) if (prefunit != -1) devmax = prefunit + 1; - else - devmax = ipath_count_units(NULL, NULL, NULL); recheck: for (i = 1; i < maxofallports; i++) { for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; @@ -1354,7 +1615,7 @@ recheck: * next. */ continue; - ret = try_alloc_port(dd, i, fp); + ret = try_alloc_port(dd, i, fp, uinfo); if (!ret) goto done; } @@ -1390,22 +1651,183 @@ done: return ret; } +static int find_shared_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int devmax, ndev, i; + int ret = 0; + + devmax = ipath_count_units(NULL, NULL, NULL); + + for (ndev = 0; ndev < devmax; ndev++) { + struct ipath_devdata *dd = ipath_lookup(ndev); + + if (!dd) + continue; + for (i = 1; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + /* Skip ports which are not yet open */ + if (!pd || !pd->port_cnt) + continue; + /* Skip port if it doesn't match the requested one */ + if (pd->port_subport_id != uinfo->spu_subport_id) + continue; + /* Verify the sharing process matches the master */ + if (pd->port_subport_cnt != uinfo->spu_subport_cnt || + pd->userversion != uinfo->spu_userversion || + pd->port_cnt >= pd->port_subport_cnt) { + ret = -EINVAL; + goto done; + } + port_fp(fp) = pd; + subport_fp(fp) = pd->port_cnt++; + tidcursor_fp(fp) = 0; + pd->active_slaves |= 1 << subport_fp(fp); + ipath_cdbg(PROC, + "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", + current->comm, current->pid, + subport_fp(fp), + pd->port_comm, pd->port_pid, + dd->ipath_unit, pd->port_port); + ret = 1; + goto done; + } + } + +done: + return ret; +} + static int ipath_open(struct inode *in, struct file *fp) { - int ret, user_minor; + /* The real work is performed later in ipath_assign_port() */ + fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); + return fp->private_data ? 0 : -ENOMEM; +} + + +/* Get port early, so can set affinity prior to memory allocation */ +static int ipath_assign_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + int i_minor; + unsigned swminor; + + /* Check to be sure we haven't already initialized this file */ + if (port_fp(fp)) { + ret = -EINVAL; + goto done; + } + + /* for now, if major version is different, bail */ + if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { + ipath_dbg("User major version %d not same as driver " + "major %d\n", uinfo->spu_userversion >> 16, + IPATH_USER_SWMAJOR); + ret = -ENODEV; + goto done; + } + + swminor = uinfo->spu_userversion & 0xffff; + if (swminor != IPATH_USER_SWMINOR) + ipath_dbg("User minor version %d not same as driver " + "minor %d\n", swminor, IPATH_USER_SWMINOR); mutex_lock(&ipath_mutex); - user_minor = iminor(in) - IPATH_USER_MINOR_BASE; + if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt && + (ret = find_shared_port(fp, uinfo))) { + mutex_unlock(&ipath_mutex); + if (ret > 0) + ret = 0; + goto done; + } + + i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE; ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)in->i_rdev, user_minor); + (long)fp->f_dentry->d_inode->i_rdev, i_minor); - if (user_minor) - ret = find_free_port(user_minor - 1, fp); + if (i_minor) + ret = find_free_port(i_minor - 1, fp, uinfo); else - ret = find_best_unit(fp); + ret = find_best_unit(fp, uinfo); mutex_unlock(&ipath_mutex); + +done: + return ret; +} + + +static int ipath_do_user_init(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + struct ipath_portdata *pd; + struct ipath_devdata *dd; + u32 head32; + + pd = port_fp(fp); + dd = pd->port_dd; + + if (uinfo->spu_rcvhdrsize) { + ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); + if (ret) + goto done; + } + + /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ + + /* for right now, kernel piobufs are at end, so port 1 is at 0 */ + pd->port_piobufs = dd->ipath_piobufbase + + dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; + ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", + pd->port_port, pd->port_piobufs); + + /* + * Now allocate the rcvhdr Q and eager TIDs; skip the TID + * array for time being. If pd->port_port > chip-supported, + * we need to do extra stuff here to handle by handling overflow + * through port 0, someday + */ + ret = ipath_create_rcvhdrq(dd, pd); + if (!ret) + ret = ipath_create_user_egr(pd); + if (ret) + goto done; + + /* + * set the eager head register for this port to the current values + * of the tail pointers, since we don't know if they were + * updated on last use of the port. + */ + head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); + ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); + dd->ipath_lastegrheads[pd->port_port] = -1; + dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; + ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", + pd->port_port, head32); + pd->port_tidcursor = 0; /* start at beginning after open */ + /* + * now enable the port; the tail registers will be written to memory + * by the chip as soon as it sees the write to + * dd->ipath_kregs->kr_rcvctrl. The update only happens on + * transition from 0 to 1, so clear it first, then set it as part of + * enabling the port. This will (very briefly) affect any other + * open ports, but it shouldn't be long enough to be an issue. + * We explictly set the in-memory copy to 0 beforehand, so we don't + * have to wait to be sure the DMA update has happened. + */ + *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL; + set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + &dd->ipath_rcvctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); +done: return ret; } @@ -1428,6 +1850,8 @@ static void unlock_expected_tids(struct ipath_portdata *pd) if (!dd->ipath_pageshadow[i]) continue; + pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i], 1); dd->ipath_pageshadow[i] = NULL; @@ -1448,6 +1872,7 @@ static void unlock_expected_tids(struct ipath_portdata *pd) static int ipath_close(struct inode *in, struct file *fp) { int ret = 0; + struct ipath_filedata *fd; struct ipath_portdata *pd; struct ipath_devdata *dd; unsigned port; @@ -1457,9 +1882,24 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_lock(&ipath_mutex); - pd = port_fp(fp); - port = pd->port_port; + fd = (struct ipath_filedata *) fp->private_data; fp->private_data = NULL; + pd = fd->pd; + if (!pd) { + mutex_unlock(&ipath_mutex); + goto bail; + } + if (--pd->port_cnt) { + /* + * XXX If the master closes the port before the slave(s), + * revoke the mmap for the eager receive queue so + * the slave(s) don't wait for receive data forever. + */ + pd->active_slaves &= ~(1 << fd->subport); + mutex_unlock(&ipath_mutex); + goto bail; + } + port = pd->port_port; dd = pd->port_dd; if (pd->port_hdrqfull) { @@ -1498,8 +1938,6 @@ static int ipath_close(struct inode *in, struct file *fp) /* clean up the pkeys for this port user */ ipath_clean_part_key(pd, dd); - - /* * be paranoid, and never write 0's to these, just use an * unused part of the port 0 tail page. Of course, @@ -1518,39 +1956,49 @@ static int ipath_close(struct inode *in, struct file *fp) i = dd->ipath_pbufsport * (port - 1); ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); + dd->ipath_f_clear_tids(dd, pd->port_port); + if (dd->ipath_pageshadow) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", pd->port_comm, pd->port_pid, dd->ipath_unit, port); - - dd->ipath_f_clear_tids(dd, pd->port_port); } - pd->port_cnt = 0; pd->port_pid = 0; - dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ +bail: + kfree(fd); return ret; } -static int ipath_port_info(struct ipath_portdata *pd, +static int ipath_port_info(struct ipath_portdata *pd, u16 subport, struct ipath_port_info __user *uinfo) { struct ipath_port_info info; int nup; int ret; + size_t sz; (void) ipath_count_units(NULL, &nup, NULL); info.num_active = nup; info.unit = pd->port_dd->ipath_unit; info.port = pd->port_port; + info.subport = subport; + /* Don't return new fields if old library opened the port. */ + if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) { + /* Number of user ports available for this device. */ + info.num_ports = pd->port_dd->ipath_cfgports - 1; + info.num_subports = pd->port_subport_cnt; + sz = sizeof(info); + } else + sz = sizeof(info) - 2 * sizeof(u16); - if (copy_to_user(uinfo, &info, sizeof(info))) { + if (copy_to_user(uinfo, &info, sz)) { ret = -EFAULT; goto bail; } @@ -1560,6 +2008,16 @@ bail: return ret; } +static int ipath_get_slave_info(struct ipath_portdata *pd, + void __user *slave_mask_addr) +{ + int ret = 0; + + if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) + ret = -EFAULT; + return ret; +} + static ssize_t ipath_write(struct file *fp, const char __user *data, size_t count, loff_t *off) { @@ -1586,6 +2044,8 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, consumed = sizeof(cmd.type); switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + case __IPATH_CMD_USER_INIT: case IPATH_CMD_USER_INIT: copy = sizeof(cmd.cmd.user_info); dest = &cmd.cmd.user_info; @@ -1612,6 +2072,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.part_key; src = &ucmd->cmd.part_key; break; + case IPATH_CMD_SLAVE_INFO: + copy = sizeof(cmd.cmd.slave_mask_addr); + dest = &cmd.cmd.slave_mask_addr; + src = &ucmd->cmd.slave_mask_addr; + break; default: ret = -EINVAL; goto bail; @@ -1629,34 +2094,55 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, consumed += copy; pd = port_fp(fp); + if (!pd && cmd.type != __IPATH_CMD_USER_INIT && + cmd.type != IPATH_CMD_ASSIGN_PORT) { + ret = -EINVAL; + goto bail; + } switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + break; + case __IPATH_CMD_USER_INIT: + /* backwards compatibility, get port first */ + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + /* and fall through to current version. */ case IPATH_CMD_USER_INIT: - ret = ipath_do_user_init(pd, &cmd.cmd.user_info); - if (ret < 0) + ret = ipath_do_user_init(fp, &cmd.cmd.user_info); + if (ret) goto bail; ret = ipath_get_base_info( - pd, (void __user *) (unsigned long) + fp, (void __user *) (unsigned long) cmd.cmd.user_info.spu_base_info, cmd.cmd.user_info.spu_base_info_size); break; case IPATH_CMD_RECV_CTRL: - ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl); + ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); break; case IPATH_CMD_PORT_INFO: - ret = ipath_port_info(pd, + ret = ipath_port_info(pd, subport_fp(fp), (struct ipath_port_info __user *) (unsigned long) cmd.cmd.port_info); break; case IPATH_CMD_TID_UPDATE: - ret = ipath_tid_update(pd, &cmd.cmd.tid_info); + ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); break; case IPATH_CMD_TID_FREE: - ret = ipath_tid_free(pd, &cmd.cmd.tid_info); + ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); break; case IPATH_CMD_SET_PART_KEY: ret = ipath_set_part_key(pd, cmd.cmd.part_key); break; + case IPATH_CMD_SLAVE_INFO: + ret = ipath_get_slave_info(pd, + (void __user *) (unsigned long) + cmd.cmd.slave_mask_addr); + break; } if (ret >= 0) @@ -1815,7 +2301,7 @@ int ipath_user_add(struct ipath_devdata *dd) if (ret < 0) { ipath_dev_err(dd, "Could not create wildcard " "minor: error %d\n", -ret); - goto bail_sma; + goto bail_user; } atomic_set(&user_setup, 1); @@ -1831,7 +2317,7 @@ int ipath_user_add(struct ipath_devdata *dd) goto bail; -bail_sma: +bail_user: user_cleanup(); bail: return ret; @@ -1853,4 +2339,3 @@ void ipath_user_remove(struct ipath_devdata *dd) bail: return; } - diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 0936d8e8d70..d9ff283f725 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -61,14 +61,13 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->u.generic_ip = data; + inode->i_private = data; if ((mode & S_IFMT) == S_IFDIR) { inode->i_op = &simple_dir_inode_operations; - inode->i_nlink++; - dir->i_nlink++; + inc_nlink(inode); + inc_nlink(dir); } inode->i_fop = fops; @@ -119,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, u16 i; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; for (i = 0; i < NUM_COUNTERS; i++) counters[i] = ipath_snap_cntr(dd, i); @@ -139,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, struct ipath_devdata *dd; u64 guid; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; guid = be64_to_cpu(dd->ipath_guid); @@ -178,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, u32 tmp, tmp2; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; /* so we only initialize non-zero fields. */ memset(portinfo, 0, sizeof portinfo); @@ -191,8 +190,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, portinfo[4] = (dd->ipath_lid << 16); /* - * Notimpl yet SMLID (should we store this in the driver, in case - * SMA dies?) CapabilityMask is 0, we don't support any of these + * Notimpl yet SMLID. + * CapabilityMask is 0, we don't support any of these * DiagCode is 0; we don't store any diag info for now Notimpl yet * M_KeyLeasePeriod (we don't support M_Key) */ @@ -325,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -357,19 +356,16 @@ static ssize_t flash_write(struct file *file, const char __user *buf, pos = *ppos; - if ( pos < 0) { + if (pos != 0) { ret = -EINVAL; goto bail; } - if (pos >= sizeof(struct ipath_flash)) { - ret = 0; + if (count != sizeof(struct ipath_flash)) { + ret = -EINVAL; goto bail; } - if (count > sizeof(struct ipath_flash) - pos) - count = sizeof(struct ipath_flash) - pos; - tmp = kmalloc(count, GFP_KERNEL); if (!tmp) { ret = -ENOMEM; @@ -381,7 +377,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 3db015da6e7..9e4e8d4c6e2 100644 --- a/drivers/infiniband/hw/ipath/ipath_ht400.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -33,7 +33,7 @@ /* * This file contains all of the code that is specific to the InfiniPath - * HT-400 chip. + * HT chip. */ #include <linux/pci.h> @@ -43,7 +43,7 @@ #include "ipath_registers.h" /* - * This lists the InfiniPath HT400 registers, in the actual chip layout. + * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. * * The names are in InterCap form because they're taken straight from @@ -252,8 +252,8 @@ static const struct ipath_cregs ipath_ht_cregs = { }; /* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK 0x1FF -#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF +#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1) +#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1) /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ #define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0 @@ -338,7 +338,7 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, if (crcbits) { u16 ctrl0, ctrl1; snprintf(bitsmsg, sizeof bitsmsg, - "[HT%s lane %s CRC (%llx); ignore till reload]", + "[HT%s lane %s CRC (%llx); powercycle to completely clear]", !(crcbits & _IPATH_HTLINK1_CRCBITS) ? "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS) ? "1 (B)" : "0+1 (A+B)"), @@ -389,17 +389,28 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, _IPATH_HTLINK1_CRCBITS))); } +/* 6110 specific hardware errors... */ +static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"), + INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"), + INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"), + INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"), + INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), + INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), +}; + /** - * ipath_ht_handle_hwerrors - display hardware errors + * ipath_ht_handle_hwerrors - display hardware errors. * @dd: the infinipath device * @msg: the output buffer * @msgl: the size of the output buffer * - * Use same msg buffer as regular errors to avoid - * excessive stack use. Most hardware errors are catastrophic, but for - * right now, we'll print them and continue. - * We reuse the same message buffer as ipath_handle_errors() to avoid - * excessive stack usage. + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * ipath_handle_errors() to avoid excessive stack usage. */ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, size_t msgl) @@ -440,19 +451,49 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~dd->ipath_lasthwerror) || + if ((hwerrs & ~(dd->ipath_lasthwerror | + ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; - if (hwerrs & ~infinipath_hwe_bitsextant) + if (hwerrs & ~dd->ipath_hwe_bitsextant) ipath_dev_err(dd, "hwerror interrupt with unknown errors " "%llx set\n", (unsigned long long) - (hwerrs & ~infinipath_hwe_bitsextant)); + (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if (ctrl & INFINIPATH_C_FREEZEMODE) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { + ipath_stats.sps_txeparity++; + ipath_dbg("Recovering from TXE parity error (%llu), " + "hwerrstatus=%llx\n", + (unsigned long long) ipath_stats.sps_txeparity, + (unsigned long long) hwerrs); + ipath_disarm_senderrbufs(dd); + hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); + if (!hwerrs) { /* else leave in freeze mode */ + ipath_write_kreg(dd, + dd->ipath_kregs->kr_control, + dd->ipath_control); + return; + } + } if (hwerrs) { /* * if any set that we aren't ignoring; only @@ -461,8 +502,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * times. */ if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Error (freeze " - "mode), no longer usable\n"); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); isfatal = 1; } *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; @@ -498,46 +540,18 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, bits); strlcat(msg, bitsmsg, msgl); } - if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_RXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_TXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) - strlcat(msg, "[IB2IPATH Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) - strlcat(msg, "[IPATH2IB Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR) - strlcat(msg, "[HTC Ireq Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR) - strlcat(msg, "[HTC Treq Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR) - strlcat(msg, "[HTC Tresp Parity]", msgl); + + ipath_format_hwerrors(hwerrs, + ipath_6110_hwerror_msgs, + sizeof(ipath_6110_hwerror_msgs) / + sizeof(ipath_6110_hwerror_msgs[0]), + msg, msgl); if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) hwerr_crcbits(dd, hwerrs, msg, msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR5) - strlcat(msg, "[HT core Misc5]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR6) - strlcat(msg, "[HT core Misc6]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR7) - strlcat(msg, "[HT core Misc7]", msgl); if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, HT-400 unusable]", + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; @@ -553,7 +567,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & _IPATH_PLL_FAIL) { snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), HT-400 unusable]", + "[PLL failed (%llx), InfiniPath hardware unusable]", (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); strlcat(msg, bitsmsg, msgl); /* ignore from now on, so disable until driver reloaded */ @@ -572,11 +586,6 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) - strlcat(msg, "[Rx Dsync]", msgl); - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) - strlcat(msg, "[SerDes PLL]", msgl); - ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) /* @@ -610,18 +619,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, break; case 5: /* - * HT-460 original production board; two production levels, with + * original production board; two production levels, with * different serial number ranges. See ipath_ht_early_init() for * case where we enable IPATH_GPIO_INTR for later serial # range. */ - n = "InfiniPath_HT-460"; + n = "InfiniPath_QHT7040"; break; case 6: n = "OEM_Board_3"; break; case 7: - /* HT-460 small form factor production board */ - n = "InfiniPath_HT-465"; + /* small form factor production board */ + n = "InfiniPath_QHT7140"; break; case 8: n = "LS/X-1"; @@ -633,7 +642,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, n = "OEM_Board_2"; break; case 11: - n = "InfiniPath_HT-470"; + n = "InfiniPath_HT-470"; /* obsoleted */ break; case 12: n = "OEM_Board_4"; @@ -641,7 +650,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, default: /* don't know, just print the number */ ipath_dev_err(dd, "Don't yet know about board " "with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u", + snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", boardrev); break; } @@ -650,11 +659,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { /* - * This version of the driver only supports the HT-400 - * Rev 3.2 + * This version of the driver only supports Rev 3.2 and 3.3 */ ipath_dev_err(dd, - "Unsupported HT-400 revision %u.%u!\n", + "Unsupported InfiniPath hardware revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; goto bail; @@ -738,11 +746,10 @@ static void ipath_check_htlink(struct ipath_devdata *dd) static int ipath_setup_ht_reset(struct ipath_devdata *dd) { - ipath_dbg("No reset possible for HT-400\n"); + ipath_dbg("No reset possible for this InfiniPath hardware\n"); return 0; } -#define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */ #define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ #define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ @@ -925,7 +932,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, /* * kernels with CONFIG_PCI_MSI set the vector in the irq field of - * struct pci_device, so we use that to program the HT-400 internal + * struct pci_device, so we use that to program the internal * interrupt register (not config space) with that value. The BIOS * must still have done the basic MSI setup. */ @@ -973,7 +980,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, * do this early, before we ever enable errors or hardware errors, * mostly to avoid causing the chip to enter freeze mode. */ - pos = pci_find_capability(pdev, HT_CAPABILITY_ID); + pos = pci_find_capability(pdev, PCI_CAP_ID_HT); if (!pos) { ipath_dev_err(dd, "Couldn't find HyperTransport " "capability; no interrupts\n"); @@ -996,7 +1003,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, else if (cap_type == HT_INTR_DISC_CONFIG) ihandler = set_int_handler(dd, pdev, pos); } while ((pos = pci_find_next_capability(pdev, pos, - HT_CAPABILITY_ID))); + PCI_CAP_ID_HT))); if (!ihandler) { ipath_dev_err(dd, "Couldn't find interrupt handler in " @@ -1013,7 +1020,7 @@ bail: * @dd: the infinipath device * * Called during driver unload. - * This is currently a nop for the HT-400, not for all chips + * This is currently a nop for the HT chip, not for all chips */ static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) { @@ -1081,21 +1088,21 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); } -static void ipath_init_ht_variables(void) +static void ipath_init_ht_variables(struct ipath_devdata *dd) { - ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - ipath_gpio_sda = IPATH_GPIO_SDA; - ipath_gpio_scl = IPATH_GPIO_SCL; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; + dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; + dd->ipath_gpio_sda = IPATH_GPIO_SDA; + dd->ipath_gpio_scl = IPATH_GPIO_SCL; - infinipath_i_bitsextant = + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << INFINIPATH_I_RCVAVAIL_SHIFT) | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - infinipath_e_bitsextant = + dd->ipath_e_bitsextant = INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | @@ -1113,7 +1120,7 @@ static void ipath_init_ht_variables(void) INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE; - infinipath_hwe_bitsextant = + dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_HTCMEMPARITYERR_MASK << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) | (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << @@ -1142,8 +1149,8 @@ static void ipath_init_ht_variables(void) INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; + dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; } /** @@ -1290,6 +1297,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) val &= ~INFINIPATH_XGXS_RESET; change = 1; } + if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & + INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { + /* need to compensate for Tx inversion in partner */ + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= dd->ipath_rx_pol_inv << + INFINIPATH_XGXS_RX_POL_SHIFT; + change = 1; + } if (change) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); @@ -1470,7 +1486,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; /* - * For HT-400, we allocate a somewhat overly large eager buffer, + * For HT, we allocate a somewhat overly large eager buffer, * such that we can guarantee that we can receive the largest * packet that we can send out. To truly support a 4KB MTU, * we need to bump this to a large value. To date, other than @@ -1531,7 +1547,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { /* - * Later production HT-460 has same changes as HT-465, so + * Later production QHT7040 has same changes as QHT7140, so * can use GPIO interrupts. They have serial #'s starting * with 128, rather than 112. */ @@ -1560,13 +1576,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) } /** - * ipath_init_ht400_funcs - set up the chip-specific function pointers + * ipath_init_iba6110_funcs - set up the chip-specific function pointers * @dd: the infinipath device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ -void ipath_init_ht400_funcs(struct ipath_devdata *dd) +void ipath_init_iba6110_funcs(struct ipath_devdata *dd) { dd->ipath_f_intrsetup = ipath_ht_intconfig; dd->ipath_f_bus = ipath_setup_ht_config; @@ -1599,5 +1615,5 @@ void ipath_init_ht400_funcs(struct ipath_devdata *dd) * do very early init that is needed before ipath_f_bus is * called */ - ipath_init_ht_variables(); + ipath_init_ht_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index b83f66d8262..a72ab9de386 100644 --- a/drivers/infiniband/hw/ipath/ipath_pe800.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -32,7 +32,7 @@ */ /* * This file contains all of the code that is specific to the - * InfiniPath PE-800 chip. + * InfiniPath PCIe chip. */ #include <linux/interrupt.h> @@ -45,9 +45,9 @@ /* * This file contains all the chip-specific register information and - * access functions for the QLogic InfiniPath PE800, the PCI-Express chip. + * access functions for the QLogic InfiniPath PCI-Express chip. * - * This lists the InfiniPath PE800 registers, in the actual chip layout. + * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. */ struct _infinipath_do_not_use_kernel_regs { @@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = { .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), - /* This group is pe-800-specific; and used only in this file */ /* The rcvpktled register controls one of the debug port signals, so * a packet activity LED can be connected to it. */ .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt), @@ -264,8 +263,8 @@ static const struct ipath_cregs ipath_pe_cregs = { }; /* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK 0x1F -#define INFINIPATH_I_RCVAVAIL_MASK 0x1F +#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1) +#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1) /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ #define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL @@ -295,6 +294,33 @@ static const struct ipath_cregs ipath_pe_cregs = { #define IPATH_GPIO_SCL (1ULL << \ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) +/* + * Rev2 silicon allows suppressing check for ArmLaunch errors. + * this can speed up short packet sends on systems that do + * not guaranteee write-order. + */ +#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63) + +/* 6120 specific hardware errors... */ +static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), + INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"), + /* + * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * parity or memory parity error failures, because most likely we + * won't be able to talk to the core of the chip. Nonetheless, we + * might see them, if they are in parts of the PCIe core that aren't + * essential. + */ + INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"), + INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"), + INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), + INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), +}; + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -344,19 +370,49 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~dd->ipath_lasthwerror) || + if ((hwerrs & ~(dd->ipath_lasthwerror | + ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; - if (hwerrs & ~infinipath_hwe_bitsextant) + if (hwerrs & ~dd->ipath_hwe_bitsextant) ipath_dev_err(dd, "hwerror interrupt with unknown errors " "%llx set\n", (unsigned long long) - (hwerrs & ~infinipath_hwe_bitsextant)); + (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if (ctrl & INFINIPATH_C_FREEZEMODE) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { + ipath_stats.sps_txeparity++; + ipath_dbg("Recovering from TXE parity error (%llu), " + "hwerrstatus=%llx\n", + (unsigned long long) ipath_stats.sps_txeparity, + (unsigned long long) hwerrs); + ipath_disarm_senderrbufs(dd); + hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); + if (!hwerrs) { /* else leave in freeze mode */ + ipath_write_kreg(dd, + dd->ipath_kregs->kr_control, + dd->ipath_control); + return; + } + } if (hwerrs) { /* * if any set that we aren't ignoring only make the @@ -364,8 +420,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * and we get here multiple times */ if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Error (freeze " - "mode), no longer usable\n"); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); isfatal = 1; } /* @@ -379,16 +436,15 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, } else { ipath_dbg("Clearing freezemode on ignored hardware " "error\n"); - ctrl &= ~INFINIPATH_C_FREEZEMODE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - ctrl); + dd->ipath_control); } } *msg = '\0'; if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, PE-800 unusable]", + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ *dd->ipath_statusp |= IPATH_STATUS_HWERROR; @@ -396,24 +452,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, dd->ipath_hwerrmask); } - if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_RXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_TXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } + + ipath_format_hwerrors(hwerrs, + ipath_6120_hwerror_msgs, + sizeof(ipath_6120_hwerror_msgs)/ + sizeof(ipath_6120_hwerror_msgs[0]), + msg, msgl); + if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) { bits = (u32) ((hwerrs >> @@ -423,17 +468,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, "[PCIe Mem Parity Errs %x] ", bits); strlcat(msg, bitsmsg, msgl); } - if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) - strlcat(msg, "[IB2IPATH Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) - strlcat(msg, "[IPATH2IB Parity]", msgl); #define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ INFINIPATH_HWE_COREPLL_RFSLIP ) if (hwerrs & _IPATH_PLL_FAIL) { snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), PE-800 unusable]", + "[PLL failed (%llx), InfiniPath hardware unusable]", (unsigned long long) hwerrs & _IPATH_PLL_FAIL); strlcat(msg, bitsmsg, msgl); /* ignore from now on, so disable until driver reloaded */ @@ -452,34 +493,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP) - strlcat(msg, "[PCIe Poisoned TLP]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT) - strlcat(msg, "[PCIe completion timeout]", msgl); - - /* - * In practice, it's unlikely wthat we'll see PCIe PLL, or bus - * parity or memory parity error failures, because most likely we - * won't be able to talk to the core of the chip. Nonetheless, we - * might see them, if they are in parts of the PCIe core that aren't - * essential. - */ - if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED) - strlcat(msg, "[PCIePLL1]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED) - strlcat(msg, "[PCIePLL0]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH) - strlcat(msg, "[PCIe XTLH core parity]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM) - strlcat(msg, "[PCIe ADM TX core parity]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM) - strlcat(msg, "[PCIe ADM RX core parity]", msgl); - - if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) - strlcat(msg, "[Rx Dsync]", msgl); - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) - strlcat(msg, "[SerDes PLL]", msgl); - ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { /* @@ -511,22 +524,28 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, n = "InfiniPath_Emulation"; break; case 1: - n = "InfiniPath_PE-800-Bringup"; + n = "InfiniPath_QLE7140-Bringup"; break; case 2: - n = "InfiniPath_PE-880"; + n = "InfiniPath_QLE7140"; break; case 3: - n = "InfiniPath_PE-850"; + n = "InfiniPath_QMI7140"; break; case 4: - n = "InfiniPath_PE-860"; + n = "InfiniPath_QEM7140"; + break; + case 5: + n = "InfiniPath_QMH7140"; + break; + case 6: + n = "InfiniPath_QLE7142"; break; default: ipath_dev_err(dd, "Don't yet know about board with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u", + snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u", boardrev); break; } @@ -534,7 +553,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, snprintf(name, namelen, "%s", n); if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) { - ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n", + ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; } else @@ -568,9 +587,12 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) if (!dd->ipath_boardrev) // no PLL for Emulator val &= ~INFINIPATH_HWE_SERDESPLLFAILED; - /* workaround bug 9460 in internal interface bus parity checking */ - val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; - + if (dd->ipath_minrev < 2) { + /* workaround bug 9460 in internal interface bus parity + * checking. Fixed (HW bug 9490) in Rev2. + */ + val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; + } dd->ipath_hwerrmask = val; } @@ -580,8 +602,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) */ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) { - u64 val, tmp, config1; - int ret = 0, change = 0; + u64 val, tmp, config1, prev_val; + int ret = 0; ipath_dbg("Trying to bringup serdes\n"); @@ -638,6 +660,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + prev_val = val; if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { val &= @@ -645,13 +668,30 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) INFINIPATH_XGXS_MDIOADDR_SHIFT); /* MDIO address 3 */ val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; - change = 1; } if (val & INFINIPATH_XGXS_RESET) { val &= ~INFINIPATH_XGXS_RESET; - change = 1; } - if (change) + if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & + INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { + /* need to compensate for Tx inversion in partner */ + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= dd->ipath_rx_pol_inv << + INFINIPATH_XGXS_RX_POL_SHIFT; + } + if (dd->ipath_minrev >= 2) { + /* Rev 2. can tolerate multiple writes to PBC, and + * allowing them can provide lower latency on some + * CPUs, but this feature is off by default, only + * turned on by setting D63 of XGXSconfig reg. + * May want to make this conditional more + * fine-grained in future. This is not exactly + * related to XGXS, but where the bit ended up. + */ + val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR; + } + if (val != prev_val) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); @@ -705,9 +745,25 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); } -/* this is not yet needed on the PE800, so just return 0. */ static int ipath_pe_intconfig(struct ipath_devdata *dd) { + u64 val; + u32 chiprev; + + /* + * If the chip supports added error indication via GPIO pins, + * enable interrupts on those bits so the interrupt routine + * can count the events. Also set flag so interrupt routine + * can know they are expected. + */ + chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT; + if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { + /* Rev2+ reports extra errors via internal GPIO pins */ + dd->ipath_flags |= IPATH_GPIO_ERRINTRS; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val |= IPATH_GPIO_ERRINTR_MASK; + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + } return 0; } @@ -759,8 +815,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, * * This is called during driver unload. * We do the pci_disable_msi here, not in generic code, because it - * isn't used for the HT-400. If we do end up needing pci_enable_msi - * at some point in the future for HT-400, we'll move the call back + * isn't used for the HT chips. If we do end up needing pci_enable_msi + * at some point in the future for HT, we'll move the call back * into the main init_one code. */ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) @@ -780,10 +836,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) * late in 2.6.16). * All that can be done is to edit the kernel source to remove the quirk * check until that is fixed. - * We do not need to call enable_msi() for our HyperTransport chip (HT-400), - * even those it uses MSI, and we want to avoid the quirk warning, so - * So we call enable_msi only for the PE-800. If we do end up needing - * pci_enable_msi at some point in the future for HT-400, we'll move the + * We do not need to call enable_msi() for our HyperTransport chip, + * even though it uses MSI, and we want to avoid the quirk warning, so + * So we call enable_msi only for PCIe. If we do end up needing + * pci_enable_msi at some point in the future for HT, we'll move the * call back into the main init_one code. * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that @@ -841,21 +897,23 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd, return 0; } -static void ipath_init_pe_variables(void) +static void ipath_init_pe_variables(struct ipath_devdata *dd) { /* * bits for selecting i2c direction and values, * used for I2C serial flash */ - ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - ipath_gpio_sda = IPATH_GPIO_SDA; - ipath_gpio_scl = IPATH_GPIO_SCL; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; + dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; + dd->ipath_gpio_sda = IPATH_GPIO_SDA; + dd->ipath_gpio_scl = IPATH_GPIO_SCL; /* variables for sanity checking interrupt and errors */ - infinipath_hwe_bitsextant = + dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | + (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) | INFINIPATH_HWE_PCIE1PLLFAILED | @@ -871,13 +929,13 @@ static void ipath_init_pe_variables(void) INFINIPATH_HWE_SERDESPLLFAILED | INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - infinipath_i_bitsextant = + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << INFINIPATH_I_RCVAVAIL_SHIFT) | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - infinipath_e_bitsextant = + dd->ipath_e_bitsextant = INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | @@ -895,8 +953,8 @@ static void ipath_init_pe_variables(void) INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE; - infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; + dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; } /* setup the MSI stuff again after a reset. I'd like to just call @@ -971,8 +1029,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd) int ret; /* Use ERROR so it shows up in logs, etc. */ - ipath_dev_err(dd, "Resetting PE-800 unit %u\n", - dd->ipath_unit); + ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit); /* keep chip from being accessed in a few places */ dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT); val = dd->ipath_control | INFINIPATH_C_RESET; @@ -1071,6 +1128,45 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, mmiowb(); spin_unlock_irqrestore(&dd->ipath_tid_lock, flags); } +/** + * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher + * @dd: the infinipath device + * @tidptr: pointer to the expected TID (in chip) to udpate + * @tidtype: 0 for eager, 1 for expected + * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing + * + * This exists as a separate routine to allow for selection of the + * appropriate "flavor". The static calls in cleanup just use the + * revision-agnostic form, as they are not performance critical. + */ +static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + u32 __iomem *tidp32 = (u32 __iomem *)tidptr; + + if (pa != dd->ipath_tidinvalid) { + if (pa & ((1U << 11) - 1)) { + dev_info(&dd->pcidev->dev, "BUG: physaddr %lx " + "not 2KB aligned!\n", pa); + return; + } + pa >>= 11; + /* paranoia check */ + if (pa & (7<<29)) + ipath_dev_err(dd, + "BUG: Physical page address 0x%lx " + "has bits set in 31-29\n", pa); + + if (type == 0) + pa |= dd->ipath_tidtemplate; + else /* for now, always full 4KB page */ + pa |= 2 << 29; + } + if (dd->ipath_kregbase) + writel(pa, tidp32); + mmiowb(); +} + /** * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager @@ -1078,7 +1174,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, * @port: the port * * clear all TID entries for a port, expected and eager. - * Used from ipath_close(). On PE800, TIDs are only 32 bits, + * Used from ipath_close(). On this chip, TIDs are only 32 bits, * not 64, but they are still on 64 bit boundaries, so tidbase * is declared as u64 * for the pointer math, even though we write 32 bits */ @@ -1148,9 +1244,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_flags |= IPATH_4BYTE_TID; /* - * For openib, we need to be able to handle an IB header of 96 bytes - * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we - * made them the same size as the PIO buffers. The PE-800 does not + * For openfabrics, we need to be able to handle an IB header of + * 24 dwords. HT chip has arbitrary sized receive buffers, so we + * made them the same size as the PIO buffers. This chip does not * handle arbitrary size buffers, so we need the header large enough * to handle largest IB header, but still have room for a 2KB MTU * standard IB packet. @@ -1158,11 +1254,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_rcvhdrentsize = 24; dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; - /* For HT-400, we allocate a somewhat overly large eager buffer, - * such that we can guarantee that we can receive the largest packet - * that we can send out. To truly support a 4KB MTU, we need to - * bump this to a larger value. We'll do this when I get around to - * testing 4KB sends on the PE-800, which I have not yet done. + /* + * To truly support a 4KB MTU (for usermode), we need to + * bump this to a larger value. For now, we use them for + * the kernel only. */ dd->ipath_rcvegrbufsize = 2048; /* @@ -1175,9 +1270,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; /* - * For PE-800, we can request a receive interrupt for 1 or + * We can request a receive interrupt for 1 or * more packets from current offset. For now, we set this - * up for a single packet, to match the HT-400 behavior. + * up for a single packet. */ dd->ipath_rhdrhead_intr_off = 1ULL<<32; @@ -1193,7 +1288,7 @@ int __attribute__((weak)) ipath_unordered_wc(void) /** * ipath_init_pe_get_base_info - set chip-specific flags for user code - * @dd: the infinipath device + * @pd: the infinipath port * @kbase: ipath_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs @@ -1202,6 +1297,7 @@ int __attribute__((weak)) ipath_unordered_wc(void) static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) { struct ipath_base_info *kinfo = kbase; + struct ipath_devdata *dd; if (ipath_unordered_wc()) { kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER; @@ -1210,19 +1306,31 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) else ipath_cdbg(PROC, "Not Intel processor, WC ordered\n"); - kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; + if (pd == NULL) + goto done; + dd = pd->port_dd; + + if (dd != NULL && dd->ipath_minrev >= 2) { + ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n"); + kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE; + ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n"); + kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN; + } + +done: + kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; return 0; } /** - * ipath_init_pe800_funcs - set up the chip-specific function pointers + * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ -void ipath_init_pe800_funcs(struct ipath_devdata *dd) +void ipath_init_iba6120_funcs(struct ipath_devdata *dd) { dd->ipath_f_intrsetup = ipath_pe_intconfig; dd->ipath_f_bus = ipath_setup_pe_config; @@ -1234,7 +1342,10 @@ void ipath_init_pe800_funcs(struct ipath_devdata *dd) dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes; dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; dd->ipath_f_clear_tids = ipath_pe_clear_tids; - dd->ipath_f_put_tid = ipath_pe_put_tid; + if (dd->ipath_minrev >= 2) + dd->ipath_f_put_tid = ipath_pe_put_tid_2; + else + dd->ipath_f_put_tid = ipath_pe_put_tid; dd->ipath_f_cleanup = ipath_setup_pe_cleanup; dd->ipath_f_setextled = ipath_setup_pe_setextled; dd->ipath_f_get_base_info = ipath_pe_get_base_info; @@ -1249,6 +1360,6 @@ void ipath_init_pe800_funcs(struct ipath_devdata *dd) dd->ipath_kregs = &ipath_pe_kregs; dd->ipath_cregs = &ipath_pe_cregs; - ipath_init_pe_variables(); + ipath_init_pe_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 414cdd1d80a..d819cca524c 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO); MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); /* - * Number of buffers reserved for driver (layered drivers and SMA - * send). Reserved at end of buffer list. Initialized based on + * Number of buffers reserved for driver (verbs and layered drivers.) + * Reserved at end of buffer list. Initialized based on * number of PIO buffers if not set via module interface. * The problem with this is that it's global, but we'll use different * numbers for different chip types. So the default value is not @@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); * * Allocate the eager TID buffers and program them into infinipath. * We use the network layer alloc_skb() allocator to allocate the - * memory, and either use the buffers as is for things like SMA + * memory, and either use the buffers as is for things like verbs * packets, or pass the buffers up to the ipath layered driver and * thence the network layer, replacing them as we do so (see * ipath_rcv_layer()). @@ -88,13 +88,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); static int create_port0_egr(struct ipath_devdata *dd) { unsigned e, egrcnt; - struct sk_buff **skbs; + struct ipath_skbinfo *skbinfo; int ret; egrcnt = dd->ipath_rcvegrcnt; - skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt); - if (skbs == NULL) { + skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); + if (skbinfo == NULL) { ipath_dev_err(dd, "allocation error for eager TID " "skb array\n"); ret = -ENOMEM; @@ -109,13 +109,13 @@ static int create_port0_egr(struct ipath_devdata *dd) * 4 bytes so that the data buffer stays word aligned. * See ipath_kreceive() for more details. */ - skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL); - if (!skbs[e]) { + skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL); + if (!skbinfo[e].skb) { ipath_dev_err(dd, "SKB allocation error for " "eager TID %u\n", e); while (e != 0) - dev_kfree_skb(skbs[--e]); - vfree(skbs); + dev_kfree_skb(skbinfo[--e].skb); + vfree(skbinfo); ret = -ENOMEM; goto bail; } @@ -124,14 +124,17 @@ static int create_port0_egr(struct ipath_devdata *dd) * After loop above, so we can test non-NULL to see if ready * to use at receive, etc. */ - dd->ipath_port0_skbs = skbs; + dd->ipath_port0_skbinfo = skbinfo; for (e = 0; e < egrcnt; e++) { - unsigned long phys = - virt_to_phys(dd->ipath_port0_skbs[e]->data); + dd->ipath_port0_skbinfo[e].phys = + ipath_map_single(dd->pcidev, + dd->ipath_port0_skbinfo[e].skb->data, + dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); dd->ipath_f_put_tid(dd, e + (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), 0, phys); + dd->ipath_rcvegrbase), 0, + dd->ipath_port0_skbinfo[e].phys); } ret = 0; @@ -240,7 +243,11 @@ static int init_chip_first(struct ipath_devdata *dd, "only supports %u\n", ipath_cfgports, dd->ipath_portcnt); } - dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports, + /* + * Allocate full portcnt array, rather than just cfgports, because + * cleanup iterates across all possible ports. + */ + dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt, GFP_KERNEL); if (!dd->ipath_pd) { @@ -428,16 +435,33 @@ done: */ static void init_shadow_tids(struct ipath_devdata *dd) { - dd->ipath_pageshadow = (struct page **) - vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + struct page **pages; + dma_addr_t *addrs; + + pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * sizeof(struct page *)); - if (!dd->ipath_pageshadow) + if (!pages) { ipath_dev_err(dd, "failed to allocate shadow page * " "array, no expected sends!\n"); - else - memset(dd->ipath_pageshadow, 0, - dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(struct page *)); + dd->ipath_pageshadow = NULL; + return; + } + + addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(dma_addr_t)); + if (!addrs) { + ipath_dev_err(dd, "failed to allocate shadow dma handle " + "array, no expected sends!\n"); + vfree(dd->ipath_pageshadow); + dd->ipath_pageshadow = NULL; + return; + } + + memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(struct page *)); + + dd->ipath_pageshadow = pages; + dd->ipath_physshadow = addrs; } static void enable_chip(struct ipath_devdata *dd, @@ -446,9 +470,9 @@ static void enable_chip(struct ipath_devdata *dd, u32 val; int i; - if (!reinit) { - init_waitqueue_head(&ipath_sma_state_wait); - } + if (!reinit) + init_waitqueue_head(&ipath_state_wait); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -687,7 +711,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) / (sizeof(u64) * BITS_PER_BYTE / 2); if (ipath_kpiobufs == 0) { - /* not set by user, or set explictly to default */ + /* not set by user (this is default) */ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) kpiobufs = 32; else @@ -946,6 +970,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp) dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; } + ipath_kpiobufs = val; ret = 0; bail: spin_unlock_irqrestore(&ipath_devs_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 280e732660a..6bee53ce5f3 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -34,9 +34,53 @@ #include <linux/pci.h> #include "ipath_kernel.h" -#include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + */ +void ipath_disarm_senderrbufs(struct ipath_devdata *dd) +{ + u32 piobcnt; + unsigned long sbuf[4]; + /* + * it's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling + */ + piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + /* read these before writing errorclear */ + sbuf[0] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror); + sbuf[1] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 1); + if (piobcnt > 128) { + sbuf[2] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 2); + sbuf[3] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 3); + } + + if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { + int i; + if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) { + __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, + "SendbufErrs %lx %lx", sbuf[0], + sbuf[1]); + if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) + printk(" %lx %lx ", sbuf[2], sbuf[3]); + printk("\n"); + } + + for (i = 0; i < piobcnt; i++) + if (test_bit(i, sbuf)) + ipath_disarm_piobufs(dd, i, 1); + dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ + } +} + + /* These are all rcv-related errors which we want to count for stats */ #define E_SUM_PKTERRS \ (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ @@ -68,53 +112,9 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) { - unsigned long sbuf[4]; u64 ignore_this_time = 0; - u32 piobcnt; - - /* if possible that sendbuffererror could be valid */ - piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - /* read these before writing errorclear */ - sbuf[0] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror); - sbuf[1] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 1); - if (piobcnt > 128) { - sbuf[2] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 2); - sbuf[3] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 3); - } - - if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { - int i; - - ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]); - if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) - printk("%lx %lx ", sbuf[2], sbuf[3]); - for (i = 0; i < piobcnt; i++) { - if (test_bit(i, sbuf)) { - u32 __iomem *piobuf; - if (i < dd->ipath_piobcnt2k) - piobuf = (u32 __iomem *) - (dd->ipath_pio2kbase + - i * dd->ipath_palign); - else - piobuf = (u32 __iomem *) - (dd->ipath_pio4kbase + - (i - dd->ipath_piobcnt2k) * - dd->ipath_4kalign); - - ipath_cdbg(PKT, - "PIObuf[%u] @%p pbc is %x; ", - i, piobuf, readl(piobuf)); - ipath_disarm_piobufs(dd, i, 1); - } - } - if (ipath_debug & __IPATH_PKTDBG) - printk("\n"); - } + ipath_disarm_senderrbufs(dd); if ((errs & E_SUM_LINK_PKTERRS) && !(dd->ipath_flags & IPATH_LINKACTIVE)) { /* @@ -132,6 +132,82 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) return ignore_this_time; } +/* generic hw error messages... */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \ + .msg = "TXE " #a " Memory Parity" \ + } +#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \ + .msg = "RXE " #a " Memory Parity" \ + } + +static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"), + INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"), + + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO), + + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO), +}; + +/** + * ipath_format_hwmsg - format a single hwerror message + * @msg message buffer + * @msgl length of message buffer + * @hwmsg message to add to message buffer + */ +static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) +{ + strlcat(msg, "[", msgl); + strlcat(msg, hwmsg, msgl); + strlcat(msg, "]", msgl); +} + +/** + * ipath_format_hwerrors - format hardware error messages for display + * @hwerrs hardware errors bit vector + * @hwerrmsgs hardware error descriptions + * @nhwerrmsgs number of hwerrmsgs + * @msg message buffer + * @msgl message buffer length + */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t msgl) +{ + int i; + const int glen = + sizeof(ipath_generic_hwerror_msgs) / + sizeof(ipath_generic_hwerror_msgs[0]); + + for (i=0; i<glen; i++) { + if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { + ipath_format_hwmsg(msg, msgl, + ipath_generic_hwerror_msgs[i].msg); + } + } + + for (i=0; i<nhwerrmsgs; i++) { + if (hwerrs & hwerrmsgs[i].mask) { + ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg); + } + } +} + /* return the strings for the most common link states */ static char *ib_linkstate(u32 linkstate) { @@ -201,7 +277,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, ib_linkstate(lstate)); } else - ipath_cdbg(SMA, "Unit %u link state %s, last " + ipath_cdbg(VERBOSE, "Unit %u link state %s, last " "was %s\n", dd->ipath_unit, ib_linkstate(lstate), ib_linkstate((unsigned) @@ -213,7 +289,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || lstate == IPATH_IBSTATE_ACTIVE) - ipath_cdbg(SMA, "Unit %u link state down" + ipath_cdbg(VERBOSE, "Unit %u link state down" " (state 0x%x), from %s\n", dd->ipath_unit, (u32)val & IPATH_IBSTATE_MASK, @@ -269,7 +345,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, INFINIPATH_IBCS_LINKSTATE_MASK) == INFINIPATH_IBCS_L_STATE_ACTIVE) /* if from up to down be more vocal */ - ipath_cdbg(SMA, + ipath_cdbg(VERBOSE, "Unit %u link now down (%s)\n", dd->ipath_unit, ipath_ibcstatus_str[ltstate]); @@ -289,8 +365,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, *dd->ipath_statusp |= IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; dd->ipath_f_setextled(dd, lstate, ltstate); - - __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP); } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) { /* * set INIT and DOWN. Down is checked by most of the other @@ -406,10 +480,10 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); } - if (!noprint && (errs & ~infinipath_e_bitsextant)) + if (!noprint && (errs & ~dd->ipath_e_bitsextant)) ipath_dev_err(dd, "error interrupt with unknown errors " "%llx set\n", (unsigned long long) - (errs & ~infinipath_e_bitsextant)); + (errs & ~dd->ipath_e_bitsextant)); if (errs & E_SUM_ERRS) ignore_this_time = handle_e_sum_errs(dd, errs); @@ -480,6 +554,14 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) ~(INFINIPATH_E_HARDWARE | INFINIPATH_E_IBSTATUSCHANGED); } + + /* likely due to cancel, so suppress */ + if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && + dd->ipath_lastcancel > jiffies) { + ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n"); + errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); + } + if (!errs) return 0; @@ -531,7 +613,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * don't report same point multiple times, * except kernel */ - tl = (u32) * pd->port_rcvhdrtail_kvaddr; + tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; if (tl == dd->ipath_lastrcvhdrqtails[i]) continue; hd = ipath_read_ureg32(dd, ur_rcvhdrhead, @@ -598,11 +680,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) if (!noprint && *msg) ipath_dev_err(dd, "%s error\n", msg); - if (dd->ipath_sma_state_wanted & dd->ipath_flags) { - ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, " - "waking\n", dd->ipath_sma_state_wanted, + if (dd->ipath_state_wanted & dd->ipath_flags) { + ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " + "waking\n", dd->ipath_state_wanted, dd->ipath_flags); - wake_up_interruptible(&ipath_sma_state_wait); + wake_up_interruptible(&ipath_state_wait); } return chkerrpkts; @@ -708,11 +790,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd) { int ret; - ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); - if (ret > 0) - goto set; - - ret = __ipath_verbs_piobufavail(dd); + ret = ipath_ib_piobufavail(dd->verbs_dev); if (ret > 0) goto set; @@ -735,9 +813,9 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) int rcvdint = 0; portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & - infinipath_i_rcvavail_mask) + dd->ipath_i_rcvavail_mask) | ((istat >> INFINIPATH_I_RCVURG_SHIFT) & - infinipath_i_rcvurg_mask); + dd->ipath_i_rcvurg_mask); for (i = 1; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; if (portr & (1 << i) && pd && pd->port_cnt && @@ -814,7 +892,7 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) if (oldhead != curtail) { if (dd->ipath_flags & IPATH_GPIO_INTR) { ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) (1 << 2)); + (u64) (1 << IPATH_GPIO_PORT0_BIT)); istat = port0rbits | INFINIPATH_I_GPIO; } else @@ -844,10 +922,10 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) if (unexpected) unexpected = 0; - if (unlikely(istat & ~infinipath_i_bitsextant)) + if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, "interrupt with unknown interrupts %x set\n", - istat & (u32) ~ infinipath_i_bitsextant); + istat & (u32) ~ dd->ipath_i_bitsextant); else ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); @@ -873,26 +951,80 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) if (istat & INFINIPATH_I_GPIO) { /* - * Packets are available in the port 0 rcv queue. - * Eventually this needs to be generalized to check - * IPATH_GPIO_INTR, and the specific GPIO bit, if - * GPIO interrupts are used for anything else. + * GPIO interrupts fall in two broad classes: + * GPIO_2 indicates (on some HT4xx boards) that a packet + * has arrived for Port 0. Checking for this + * is controlled by flag IPATH_GPIO_INTR. + * GPIO_3..5 on IBA6120 Rev2 chips indicate errors + * that we need to count. Checking for this + * is controlled by flag IPATH_GPIO_ERRINTRS. */ - if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) { - u32 gpiostatus; - gpiostatus = ipath_read_kreg32( - dd, dd->ipath_kregs->kr_gpio_status); - ipath_dbg("Unexpected GPIO interrupt bits %x\n", - gpiostatus); - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - gpiostatus); + u32 gpiostatus; + u32 to_clear = 0; + + gpiostatus = ipath_read_kreg32( + dd, dd->ipath_kregs->kr_gpio_status); + /* First the error-counter case. + */ + if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) && + (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) { + /* want to clear the bits we see asserted. */ + to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK); + + /* + * Count appropriately, clear bits out of our copy, + * as they have been "handled". + */ + if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) { + ipath_dbg("FlowCtl on UnsupVL\n"); + dd->ipath_rxfc_unsupvl_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) { + ipath_dbg("Overrun Threshold exceeded\n"); + dd->ipath_overrun_thresh_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) { + ipath_dbg("Local Link Integrity error\n"); + dd->ipath_lli_errs++; + } + gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK; } - else { - /* Clear GPIO status bit 2 */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) (1 << 2)); + /* Now the Port0 Receive case */ + if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) && + (dd->ipath_flags & IPATH_GPIO_INTR)) { + /* + * GPIO status bit 2 is set, and we expected it. + * clear it and indicate in p0bits. + * This probably only happens if a Port0 pkt + * arrives at _just_ the wrong time, and we + * handle that by seting chk0rcv; + */ + to_clear |= (1 << IPATH_GPIO_PORT0_BIT); + gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); chk0rcv = 1; } + if (unlikely(gpiostatus)) { + /* + * Some unexpected bits remain. If they could have + * caused the interrupt, complain and clear. + * MEA: this is almost certainly non-ideal. + * we should look into auto-disable of unexpected + * GPIO interrupts, possibly on a "three strikes" + * basis. + */ + u32 mask; + mask = ipath_read_kreg32( + dd, dd->ipath_kregs->kr_gpio_mask); + if (mask & gpiostatus) { + ipath_dbg("Unexpected GPIO IRQ bits %x\n", + gpiostatus & mask); + to_clear |= (gpiostatus & mask); + } + } + if (to_clear) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, + (u64) to_clear); + } } chk0rcv |= istat & port0rbits; @@ -917,9 +1049,9 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) istat &= ~port0rbits; } - if (istat & ((infinipath_i_rcvavail_mask << + if (istat & ((dd->ipath_i_rcvavail_mask << INFINIPATH_I_RCVAVAIL_SHIFT) - | (infinipath_i_rcvurg_mask << + | (dd->ipath_i_rcvurg_mask << INFINIPATH_I_RCVURG_SHIFT))) handle_urcv(dd, istat); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index e9f374fb641..d7540b71b45 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -39,6 +39,8 @@ */ #include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> #include <asm/io.h> #include "ipath_common.h" @@ -62,7 +64,7 @@ struct ipath_portdata { /* rcvhdrq base, needs mmap before useful */ void *port_rcvhdrq; /* kernel virtual address where hdrqtail is updated */ - volatile __le64 *port_rcvhdrtail_kvaddr; + void *port_rcvhdrtail_kvaddr; /* * temp buffer for expected send setup, allocated at open, instead * of each setup call @@ -79,8 +81,8 @@ struct ipath_portdata { dma_addr_t port_rcvhdrq_phys; dma_addr_t port_rcvhdrqtailaddr_phys; /* - * number of opens on this instance (0 or 1; ignoring forks, dup, - * etc. for now) + * number of opens (including slave subports) on this instance + * (ignoring forks, dup, etc. for now) */ int port_cnt; /* @@ -89,6 +91,10 @@ struct ipath_portdata { */ /* instead of calculating it */ unsigned port_port; + /* non-zero if port is being shared. */ + u16 port_subport_cnt; + /* non-zero if port is being shared. */ + u16 port_subport_id; /* chip offset of PIO buffers for this port */ u32 port_piobufs; /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ @@ -121,6 +127,16 @@ struct ipath_portdata { u16 port_pkeys[4]; /* so file ops can get at unit */ struct ipath_devdata *port_dd; + /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ + void *subport_uregbase; + /* An array of pages for the eager receive buffers * N */ + void *subport_rcvegrbuf; + /* An array of pages for the eager header queue entries * N */ + void *subport_rcvhdr_base; + /* The version of the library which opened this port */ + u32 userversion; + /* Bitmask of active slaves */ + u32 active_slaves; }; struct sk_buff; @@ -132,10 +148,9 @@ struct _ipath_layer { void *l_arg; }; -/* Verbs layer interface */ -struct _verbs_layer { - void *l_arg; - struct timer_list l_timer; +struct ipath_skbinfo { + struct sk_buff *skb; + dma_addr_t phys; }; struct ipath_devdata { @@ -160,7 +175,7 @@ struct ipath_devdata { /* ipath_cfgports pointers */ struct ipath_portdata **ipath_pd; /* sk_buffs used by port 0 eager receive queue */ - struct sk_buff **ipath_port0_skbs; + struct ipath_skbinfo *ipath_port0_skbinfo; /* kvirt address of 1st 2k pio buffer */ void __iomem *ipath_pio2kbase; /* kvirt address of 1st 4k pio buffer */ @@ -198,7 +213,8 @@ struct ipath_devdata { void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); /* fill out chip-specific fields */ int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); - struct _verbs_layer verbs_layer; + struct ipath_ibdev *verbs_dev; + struct timer_list verbs_timer; /* total dwords sent (summed from counter) */ u64 ipath_sword; /* total dwords rcvd (summed from counter) */ @@ -241,7 +257,7 @@ struct ipath_devdata { u64 ipath_tidtemplate; /* value to write to free TIDs */ u64 ipath_tidinvalid; - /* PE-800 rcv interrupt setup */ + /* IBA6120 rcv interrupt setup */ u64 ipath_rhdrhead_intr_off; /* size of memory at ipath_kregbase */ @@ -250,8 +266,8 @@ struct ipath_devdata { u32 ipath_pioavregs; /* IPATH_POLL, etc. */ u32 ipath_flags; - /* ipath_flags sma is waiting for */ - u32 ipath_sma_state_wanted; + /* ipath_flags driver is waiting for */ + u32 ipath_state_wanted; /* last buffer for user use, first buf for kernel use is this * index. */ u32 ipath_lastport_piobuf; @@ -311,10 +327,6 @@ struct ipath_devdata { u32 ipath_pcibar0; /* so we can rewrite it after a chip reset */ u32 ipath_pcibar1; - /* sequential tries for SMA send and no bufs */ - u32 ipath_nosma_bufs; - /* duration (seconds) ipath_nosma_bufs set */ - u32 ipath_nosma_secs; /* HT/PCI Vendor ID (here for NodeInfo) */ u16 ipath_vendorid; @@ -324,12 +336,16 @@ struct ipath_devdata { u8 ipath_ht_slave_off; /* for write combining settings */ unsigned long ipath_wc_cookie; + unsigned long ipath_wc_base; + unsigned long ipath_wc_len; /* ref count for each pkey */ atomic_t ipath_pkeyrefs[4]; /* shadow copy of all exptids physaddr; used only by funcsim */ u64 *ipath_tidsimshadow; /* shadow copy of struct page *'s for exp tid pages */ struct page **ipath_pageshadow; + /* shadow copy of dma handles for exp tid pages */ + dma_addr_t *ipath_physshadow; /* lock to workaround chip bug 9437 */ spinlock_t ipath_tid_lock; @@ -411,6 +427,9 @@ struct ipath_devdata { unsigned long ipath_rcvctrl; /* shadow kr_sendctrl */ unsigned long ipath_sendctrl; + /* ports waiting for PIOavail intr */ + unsigned long ipath_portpiowait; + unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */ /* value we put in kr_rcvhdrcnt */ u32 ipath_rcvhdrcnt; @@ -474,8 +493,6 @@ struct ipath_devdata { u32 ipath_htwidth; /* HT speed (200,400,800,1000) from HT config */ u32 ipath_htspeed; - /* ports waiting for PIOavail intr */ - unsigned long ipath_portpiowait; /* * number of sequential ibcstatus change for polling active/quiet * (i.e., link not coming up). @@ -512,34 +529,64 @@ struct ipath_devdata { u8 ipath_pci_cacheline; /* LID mask control */ u8 ipath_lmc; + /* Rx Polarity inversion (compensate for ~tx on partner) */ + u8 ipath_rx_pol_inv; /* local link integrity counter */ u32 ipath_lli_counter; /* local link integrity errors */ u32 ipath_lli_errors; + /* + * Above counts only cases where _successive_ LocalLinkIntegrity + * errors were seen in the receive headers of kern-packets. + * Below are the three (monotonically increasing) counters + * maintained via GPIO interrupts on iba6120-rev2. + */ + u32 ipath_rxfc_unsupvl_errs; + u32 ipath_overrun_thresh_errs; + u32 ipath_lli_errs; + + /* + * Not all devices managed by a driver instance are the same + * type, so these fields must be per-device. + */ + u64 ipath_i_bitsextant; + ipath_err_t ipath_e_bitsextant; + ipath_err_t ipath_hwe_bitsextant; + + /* + * Below should be computable from number of ports, + * since they are never modified. + */ + u32 ipath_i_rcvavail_mask; + u32 ipath_i_rcvurg_mask; + + /* + * Register bits for selecting i2c direction and values, used for + * I2C serial flash. + */ + u16 ipath_gpio_sda_num; + u16 ipath_gpio_scl_num; + u64 ipath_gpio_sda; + u64 ipath_gpio_scl; }; +/* Private data for file operations */ +struct ipath_filedata { + struct ipath_portdata *pd; + unsigned subport; + unsigned tidcursor; +}; extern struct list_head ipath_dev_list; extern spinlock_t ipath_devs_lock; extern struct ipath_devdata *ipath_lookup(int unit); -extern u16 ipath_layer_rcv_opcode; -extern int __ipath_layer_intr(struct ipath_devdata *, u32); -extern int ipath_layer_intr(struct ipath_devdata *, u32); -extern int __ipath_layer_rcv(struct ipath_devdata *, void *, - struct sk_buff *); -extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *); -extern int __ipath_verbs_piobufavail(struct ipath_devdata *); -extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32); - -void ipath_layer_add(struct ipath_devdata *); -void ipath_layer_remove(struct ipath_devdata *); - int ipath_init_chip(struct ipath_devdata *, int); int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); +void ipath_disarm_senderrbufs(struct ipath_devdata *); struct file_operations; int ipath_cdev_init(int minor, char *name, struct file_operations *fops, @@ -549,9 +596,8 @@ void ipath_cdev_cleanup(struct cdev **cdevp, int ipath_diag_add(struct ipath_devdata *); void ipath_diag_remove(struct ipath_devdata *); -void ipath_diag_bringup_link(struct ipath_devdata *); -extern wait_queue_head_t ipath_sma_state_wait; +extern wait_queue_head_t ipath_state_wait; int ipath_user_add(struct ipath_devdata *dd); void ipath_user_remove(struct ipath_devdata *dd); @@ -582,15 +628,21 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); int ipath_parse_ushort(const char *str, unsigned short *valp); -int ipath_wait_linkstate(struct ipath_devdata *, u32, int); -void ipath_set_ib_lstate(struct ipath_devdata *, int); void ipath_kreceive(struct ipath_devdata *); int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); int ipath_reset_device(int); void ipath_get_faststats(unsigned long); +int ipath_set_linkstate(struct ipath_devdata *, u8); +int ipath_set_mtu(struct ipath_devdata *, u16); +int ipath_set_lid(struct ipath_devdata *, u32, u8); +int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* for use in system calls, where we want to know device type, etc. */ -#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) +#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd +#define subport_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->subport +#define tidcursor_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->tidcursor /* * values for ipath_flags @@ -630,6 +682,15 @@ void ipath_get_faststats(unsigned long); /* can miss port0 rx interrupts */ #define IPATH_POLL_RX_INTR 0x40000 #define IPATH_DISABLED 0x80000 /* administratively disabled */ + /* Use GPIO interrupts for new counters */ +#define IPATH_GPIO_ERRINTRS 0x100000 + +/* Bits in GPIO for the added interrupts */ +#define IPATH_GPIO_PORT0_BIT 2 +#define IPATH_GPIO_RXUVL_BIT 3 +#define IPATH_GPIO_OVRUN_BIT 4 +#define IPATH_GPIO_LLI_BIT 5 +#define IPATH_GPIO_ERRINTR_MASK 0x38 /* portdata flag bit offsets */ /* waiting for a packet to arrive */ @@ -642,10 +703,8 @@ void ipath_free_data(struct ipath_portdata *dd); int ipath_waitfor_mdio_cmdready(struct ipath_devdata *); int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *); u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); -/* init PE-800-specific func */ -void ipath_init_pe800_funcs(struct ipath_devdata *); -/* init HT-400-specific func */ -void ipath_init_ht400_funcs(struct ipath_devdata *); +void ipath_init_iba6120_funcs(struct ipath_devdata *); +void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); @@ -801,7 +860,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, struct device_driver; -extern const char ipath_core_version[]; +extern const char ib_ipath_version[]; int ipath_driver_create_group(struct device_driver *); void ipath_driver_remove_group(struct device_driver *); @@ -810,12 +869,22 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *); void ipath_device_remove_group(struct device *, struct ipath_devdata *); int ipath_expose_reset(struct device *); +int ipath_diagpkt_add(void); +void ipath_diagpkt_remove(void); + int ipath_init_ipathfs(void); void ipath_exit_ipathfs(void); int ipathfs_add_device(struct ipath_devdata *); int ipathfs_remove_device(struct ipath_devdata *); /* + * dma_addr wrappers - all 0's invalid for hw + */ +dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long, + size_t, int); +dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); + +/* * Flush write combining store buffers (if present) and perform a write * barrier. */ @@ -831,10 +900,10 @@ const char *ipath_get_unit_name(int unit); extern struct mutex ipath_mutex; -#define IPATH_DRV_NAME "ipath_core" +#define IPATH_DRV_NAME "ib_ipath" #define IPATH_MAJOR 233 #define IPATH_USER_MINOR_BASE 0 -#define IPATH_SMA_MINOR 128 +#define IPATH_DIAGPKT_MINOR 127 #define IPATH_DIAG_MINOR_BASE 129 #define IPATH_NMINORS 255 @@ -872,4 +941,20 @@ extern struct mutex ipath_mutex; #endif /* _IPATH_DEBUGGING */ +/* + * this is used for formatting hw error messages... + */ +struct ipath_hwerror_msgs { + u64 mask; + const char *msg; +}; + +#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b } + +/* in ipath_intr.c... */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t lmsg); + #endif /* _IPATH_KERNEL_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index a5ca279370a..9a6cbd05adc 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -34,6 +34,7 @@ #include <asm/io.h> #include "ipath_verbs.h" +#include "ipath_kernel.h" /** * ipath_alloc_lkey - allocate an lkey @@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) r = (r + 1) & (rkt->max - 1); if (r == n) { spin_unlock_irqrestore(&rkt->lock, flags); - _VERBS_INFO("LKEY table full\n"); + ipath_dbg(KERN_INFO "LKEY table full\n"); ret = 0; goto bail; } @@ -117,9 +118,10 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey) * Check the IB SGE for validity and initialize our internal version * of it. */ -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ib_sge *sge, int acc) { + struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; struct ipath_mregion *mr; unsigned n, m; size_t off; @@ -139,7 +141,8 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, goto bail; } mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey)) { + if (unlikely(mr == NULL || mr->lkey != sge->lkey || + qp->ibqp.pd != mr->pd)) { ret = 0; goto bail; } @@ -187,9 +190,10 @@ bail: * * Return 1 if successful, otherwise 0. */ -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, u32 len, u64 vaddr, u32 rkey, int acc) { + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_lkey_table *rkt = &dev->lk_table; struct ipath_sge *sge = &ss->sge; struct ipath_mregion *mr; @@ -213,7 +217,8 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, } mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey)) { + if (unlikely(mr == NULL || mr->lkey != rkey || + qp->ibqp.pd != mr->pd)) { ret = 0; goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index b28c6f81c73..e46aa4ed2a7 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -42,26 +42,20 @@ #include "ipath_kernel.h" #include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" /* Acquire before ipath_devs_lock. */ static DEFINE_MUTEX(ipath_layer_mutex); -static int ipath_verbs_registered; - u16 ipath_layer_rcv_opcode; static int (*layer_intr)(void *, u32); static int (*layer_rcv)(void *, void *, struct sk_buff *); static int (*layer_rcv_lid)(void *, void *); -static int (*verbs_piobufavail)(void *); -static void (*verbs_rcv)(void *, void *, void *, u32); static void *(*layer_add_one)(int, struct ipath_devdata *); static void (*layer_remove_one)(void *); -static void *(*verbs_add_one)(int, struct ipath_devdata *); -static void (*verbs_remove_one)(void *); -static void (*verbs_timer_cb)(void *); int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg) { @@ -107,302 +101,16 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr) return ret; } -int __ipath_verbs_piobufavail(struct ipath_devdata *dd) -{ - int ret = -ENODEV; - - if (dd->verbs_layer.l_arg && verbs_piobufavail) - ret = verbs_piobufavail(dd->verbs_layer.l_arg); - - return ret; -} - -int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf, - u32 tlen) -{ - int ret = -ENODEV; - - if (dd->verbs_layer.l_arg && verbs_rcv) { - verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen); - ret = 0; - } - - return ret; -} - -int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate) +void ipath_layer_lid_changed(struct ipath_devdata *dd) { - u32 lstate; - int ret; - - switch (newstate) { - case IPATH_IB_LINKDOWN: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_SLEEP: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_DISABLE: - ipath_set_ib_lstate(dd, - INFINIPATH_IBCC_LINKINITCMD_DISABLE << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKINIT: - if (dd->ipath_flags & IPATH_LINKINIT) { - ret = 0; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKINIT; - break; - - case IPATH_IB_LINKARM: - if (dd->ipath_flags & IPATH_LINKARMED) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & - (IPATH_LINKINIT | IPATH_LINKACTIVE))) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << - INFINIPATH_IBCC_LINKCMD_SHIFT); - /* - * Since the port can transition to ACTIVE by receiving - * a non VL 15 packet, wait for either state. - */ - lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; - break; - - case IPATH_IB_LINKACTIVE: - if (dd->ipath_flags & IPATH_LINKACTIVE) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & IPATH_LINKARMED)) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKACTIVE; - break; - - default: - ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); - ret = -EINVAL; - goto bail; - } - ret = ipath_wait_linkstate(dd, lstate, 2000); - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate); - -/** - * ipath_layer_set_mtu - set the MTU - * @dd: the infinipath device - * @arg: the new MTU - * - * we can handle "any" incoming size, the issue here is whether we - * need to restrict our outgoing size. For now, we don't do any - * sanity checking on this, and we don't deal with what happens to - * programs that are already running when the size changes. - * NOTE: changing the MTU will usually cause the IBC to go back to - * link initialize (IPATH_IBSTATE_INIT) state... - */ -int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg) -{ - u32 piosize; - int changed = 0; - int ret; - - /* - * mtu is IB data payload max. It's the largest power of 2 less - * than piosize (or even larger, since it only really controls the - * largest we can receive; we can send the max of the mtu and - * piosize). We check that it's one of the valid IB sizes. - */ - if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && - arg != 4096) { - ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); - ret = -EINVAL; - goto bail; - } - if (dd->ipath_ibmtu == arg) { - ret = 0; /* same as current */ - goto bail; - } - - piosize = dd->ipath_ibmaxlen; - dd->ipath_ibmtu = arg; - - if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { - /* Only if it's not the initial value (or reset to it) */ - if (piosize != dd->ipath_init_ibmaxlen) { - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { - piosize = arg + IPATH_PIO_MAXIBHDR; - ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " - "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, - arg); - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - - if (changed) { - /* - * set the IBC maxpktlength to the size of our pio - * buffers in words - */ - u64 ibc = dd->ipath_ibcctrl; - ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << - INFINIPATH_IBCC_MAXPKTLEN_SHIFT); - - piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ - dd->ipath_ibmaxlen = piosize; - piosize /= sizeof(u32); /* in words */ - /* - * for ICRC, which we only send in diag test pkt mode, and - * we don't need to worry about that for mtu - */ - piosize += 1; - - ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; - dd->ipath_ibcctrl = ibc; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - dd->ipath_f_tidtemplate(dd); - } - - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_mtu); - -int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) -{ - dd->ipath_lid = arg; - dd->ipath_lmc = lmc; - mutex_lock(&ipath_layer_mutex); if (dd->ipath_layer.l_arg && layer_intr) layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID); mutex_unlock(&ipath_layer_mutex); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_set_lid); - -int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid) -{ - /* XXX - need to inform anyone who cares this just happened. */ - dd->ipath_guid = guid; - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_guid); - -__be64 ipath_layer_get_guid(struct ipath_devdata *dd) -{ - return dd->ipath_guid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_guid); - -u32 ipath_layer_get_nguid(struct ipath_devdata *dd) -{ - return dd->ipath_nguid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_nguid); - -u32 ipath_layer_get_majrev(struct ipath_devdata *dd) -{ - return dd->ipath_majrev; } -EXPORT_SYMBOL_GPL(ipath_layer_get_majrev); - -u32 ipath_layer_get_minrev(struct ipath_devdata *dd) -{ - return dd->ipath_minrev; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_minrev); - -u32 ipath_layer_get_pcirev(struct ipath_devdata *dd) -{ - return dd->ipath_pcirev; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev); - -u32 ipath_layer_get_flags(struct ipath_devdata *dd) -{ - return dd->ipath_flags; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_flags); - -struct device *ipath_layer_get_device(struct ipath_devdata *dd) -{ - return &dd->pcidev->dev; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_device); - -u16 ipath_layer_get_deviceid(struct ipath_devdata *dd) -{ - return dd->ipath_deviceid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid); - -u32 ipath_layer_get_vendorid(struct ipath_devdata *dd) -{ - return dd->ipath_vendorid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid); - -u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd) -{ - return dd->ipath_lastibcstat; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat); - -u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd) -{ - return dd->ipath_ibmtu; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu); - void ipath_layer_add(struct ipath_devdata *dd) { mutex_lock(&ipath_layer_mutex); @@ -411,10 +119,6 @@ void ipath_layer_add(struct ipath_devdata *dd) dd->ipath_layer.l_arg = layer_add_one(dd->ipath_unit, dd); - if (verbs_add_one) - dd->verbs_layer.l_arg = - verbs_add_one(dd->ipath_unit, dd); - mutex_unlock(&ipath_layer_mutex); } @@ -427,11 +131,6 @@ void ipath_layer_remove(struct ipath_devdata *dd) dd->ipath_layer.l_arg = NULL; } - if (dd->verbs_layer.l_arg && verbs_remove_one) { - verbs_remove_one(dd->verbs_layer.l_arg); - dd->verbs_layer.l_arg = NULL; - } - mutex_unlock(&ipath_layer_mutex); } @@ -463,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), if (dd->ipath_layer.l_arg) continue; - if (!(*dd->ipath_statusp & IPATH_STATUS_SMA)) - *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA; - spin_unlock_irqrestore(&ipath_devs_lock, flags); dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd); spin_lock_irqsave(&ipath_devs_lock, flags); @@ -509,107 +205,6 @@ void ipath_layer_unregister(void) EXPORT_SYMBOL_GPL(ipath_layer_unregister); -static void __ipath_verbs_timer(unsigned long arg) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) arg; - - /* - * If port 0 receive packet interrupts are not available, or - * can be missed, poll the receive queue - */ - if (dd->ipath_flags & IPATH_POLL_RX_INTR) - ipath_kreceive(dd); - - /* Handle verbs layer timeouts. */ - if (dd->verbs_layer.l_arg && verbs_timer_cb) - verbs_timer_cb(dd->verbs_layer.l_arg); - - mod_timer(&dd->verbs_layer.l_timer, jiffies + 1); -} - -/** - * ipath_verbs_register - verbs layer registration - * @l_piobufavail: callback for when PIO buffers become available - * @l_rcv: callback for receiving a packet - * @l_timer_cb: timer callback - * @ipath_devdata: device data structure is put here - */ -int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *arg), - int (*l_piobufavail) (void *arg), - void (*l_rcv) (void *arg, void *rhdr, - void *data, u32 tlen), - void (*l_timer_cb) (void *arg)) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - - verbs_add_one = l_add; - verbs_remove_one = l_remove; - verbs_piobufavail = l_piobufavail; - verbs_rcv = l_rcv; - verbs_timer_cb = l_timer_cb; - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - if (!(dd->ipath_flags & IPATH_INITTED)) - continue; - - if (dd->verbs_layer.l_arg) - continue; - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd); - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - mutex_unlock(&ipath_layer_mutex); - - ipath_verbs_registered = 1; - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_verbs_register); - -void ipath_verbs_unregister(void) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; - - if (dd->verbs_layer.l_arg && verbs_remove_one) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - verbs_remove_one(dd->verbs_layer.l_arg); - spin_lock_irqsave(&ipath_devs_lock, flags); - dd->verbs_layer.l_arg = NULL; - } - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - verbs_add_one = NULL; - verbs_remove_one = NULL; - verbs_piobufavail = NULL; - verbs_rcv = NULL; - verbs_timer_cb = NULL; - - ipath_verbs_registered = 0; - - mutex_unlock(&ipath_layer_mutex); -} - -EXPORT_SYMBOL_GPL(ipath_verbs_unregister); - int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax) { int ret; @@ -698,390 +293,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd) EXPORT_SYMBOL_GPL(ipath_layer_get_bcast); -u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd) -{ - return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey); - -static void update_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - sge->vaddr += length; - sge->length -= length; - sge->sge_length -= length; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - return; - sge->n = 0; - } - sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = sge->mr->map[sge->m]->segs[sge->n].length; - } -} - -#ifdef __LITTLE_ENDIAN -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); - data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#else -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); - data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#endif - -static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, - u32 length) -{ - u32 extra = 0; - u32 data = 0; - u32 last; - - while (1) { - u32 len = ss->sge.length; - u32 off; - - BUG_ON(len == 0); - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; - /* If the source address is not aligned, try to align it. */ - off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); - if (off) { - u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & - ~(sizeof(u32) - 1)); - u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); - u32 y; - - y = sizeof(u32) - off; - if (len > y) - len = y; - if (len + extra >= sizeof(u32)) { - data |= set_upper_bits(v, extra * - BITS_PER_BYTE); - len = sizeof(u32) - extra; - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, len, extra); - if (len == length) { - last = data; - break; - } - extra += len; - } - } else if (extra) { - /* Source address is aligned. */ - u32 *addr = (u32 *) ss->sge.vaddr; - int shift = extra * BITS_PER_BYTE; - int ushift = 32 - shift; - u32 l = len; - - while (l >= sizeof(u32)) { - u32 v = *addr; - - data |= set_upper_bits(v, shift); - __raw_writel(data, piobuf); - data = get_upper_bits(v, ushift); - piobuf++; - addr++; - l -= sizeof(u32); - } - /* - * We still have 'extra' number of bytes leftover. - */ - if (l) { - u32 v = *addr; - - if (l + extra >= sizeof(u32)) { - data |= set_upper_bits(v, shift); - len -= l + extra - sizeof(u32); - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, l, - extra); - if (len == length) { - last = data; - break; - } - extra += l; - } - } else if (len == length) { - last = data; - break; - } - } else if (len == length) { - u32 w; - - /* - * Need to round up for the last dword in the - * packet. - */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); - piobuf += w - 1; - last = ((u32 *) ss->sge.vaddr)[w - 1]; - break; - } else { - u32 w = len >> 2; - - __iowrite32_copy(piobuf, ss->sge.vaddr, w); - piobuf += w; - - extra = len & (sizeof(u32) - 1); - if (extra) { - u32 v = ((u32 *) ss->sge.vaddr)[w]; - - /* Clear unused upper bytes */ - data = clear_upper_bytes(v, extra, 0); - } - } - update_sge(ss, len); - length -= len; - } - /* Update address before sending packet. */ - update_sge(ss, length); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(last, piobuf); - /* be sure trigger word is written */ - ipath_flush_wc(); -} - -/** - * ipath_verbs_send - send a packet from the verbs layer - * @dd: the infinipath device - * @hdrwords: the number of words in the header - * @hdr: the packet header - * @len: the length of the packet in bytes - * @ss: the SGE to send - * - * This is like ipath_sma_send_pkt() in that we need to be able to send - * packets after the chip is initialized (MADs) but also like - * ipath_layer_send_hdr() since its used by the verbs layer. - */ -int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, - u32 *hdr, u32 len, struct ipath_sge_state *ss) -{ - u32 __iomem *piobuf; - u32 plen; - int ret; - - /* +1 is for the qword padding of pbc */ - plen = hdrwords + ((len + 3) >> 2) + 1; - if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { - ipath_dbg("packet len 0x%x too long, failing\n", plen); - ret = -EINVAL; - goto bail; - } - - /* Get a PIO buffer to use. */ - piobuf = ipath_getpiobuf(dd, NULL); - if (unlikely(piobuf == NULL)) { - ret = -EBUSY; - goto bail; - } - - /* - * Write len to control qword, no flags. - * We have to flush after the PBC for correctness on some cpus - * or WC buffer can be written out of order. - */ - writeq(plen, piobuf); - ipath_flush_wc(); - piobuf += 2; - if (len == 0) { - /* - * If there is just the header portion, must flush before - * writing last word of header for correctness, and after - * the last header word (trigger word). - */ - __iowrite32_copy(piobuf, hdr, hdrwords - 1); - ipath_flush_wc(); - __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); - ipath_flush_wc(); - ret = 0; - goto bail; - } - - __iowrite32_copy(piobuf, hdr, hdrwords); - piobuf += hdrwords; - - /* The common case is aligned and contained in one segment. */ - if (likely(ss->num_sge == 1 && len <= ss->sge.length && - !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { - u32 w; - u32 *addr = (u32 *) ss->sge.vaddr; - - /* Update address before sending packet. */ - update_sge(ss, len); - /* Need to round up for the last dword in the packet. */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, addr, w - 1); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(addr[w - 1], piobuf + w - 1); - /* be sure trigger word is written */ - ipath_flush_wc(); - ret = 0; - goto bail; - } - copy_io(piobuf, ss, len); - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_verbs_send); - -int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_dbg("unit %u not usable\n", dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); - - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters); - -/** - * ipath_layer_get_counters - get various chip counters - * @dd: the infinipath device - * @cntrs: counters are placed here - * - * Return the counters needed by recv_pma_get_portcounters(). - */ -int ipath_layer_get_counters(struct ipath_devdata *dd, - struct ipath_layer_counters *cntrs) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_dbg("unit %u not usable\n", dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - cntrs->symbol_error_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); - cntrs->link_error_recovery_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); - /* - * The link downed counter counts when the other side downs the - * connection. We add in the number of times we downed the link - * due to local link integrity errors to compensate. - */ - cntrs->link_downed_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); - cntrs->port_rcv_errors = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); - cntrs->port_rcv_remphys_errors = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); - cntrs->port_xmit_discards = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); - cntrs->port_xmit_data = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - cntrs->port_rcv_data = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - cntrs->port_xmit_packets = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - cntrs->port_rcv_packets = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - cntrs->local_link_integrity_errors = dd->ipath_lli_errors; - cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ - - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_counters); - -int ipath_layer_want_buffer(struct ipath_devdata *dd) -{ - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_want_buffer); - int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr) { int ret = 0; @@ -1153,389 +364,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd) } EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int); - -int ipath_layer_enable_timer(struct ipath_devdata *dd) -{ - /* - * HT-400 has a design flaw where the chip and kernel idea - * of the tail register don't always agree, and therefore we won't - * get an interrupt on the next packet received. - * If the board supports per packet receive interrupts, use it. - * Otherwise, the timer function periodically checks for packets - * to cover this case. - * Either way, the timer is needed for verbs layer related - * processing. - */ - if (dd->ipath_flags & IPATH_GPIO_INTR) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, - 0x2074076542310ULL); - /* Enable GPIO bit 2 interrupt */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - (u64) (1 << 2)); - } - - init_timer(&dd->verbs_layer.l_timer); - dd->verbs_layer.l_timer.function = __ipath_verbs_timer; - dd->verbs_layer.l_timer.data = (unsigned long)dd; - dd->verbs_layer.l_timer.expires = jiffies + 1; - add_timer(&dd->verbs_layer.l_timer); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_enable_timer); - -int ipath_layer_disable_timer(struct ipath_devdata *dd) -{ - /* Disable GPIO bit 2 interrupt */ - if (dd->ipath_flags & IPATH_GPIO_INTR) - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); - - del_timer_sync(&dd->verbs_layer.l_timer); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_disable_timer); - -/** - * ipath_layer_set_verbs_flags - set the verbs layer flags - * @dd: the infinipath device - * @flags: the flags to set - */ -int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags) -{ - struct ipath_devdata *ss; - unsigned long lflags; - - spin_lock_irqsave(&ipath_devs_lock, lflags); - - list_for_each_entry(ss, &ipath_dev_list, ipath_list) { - if (!(ss->ipath_flags & IPATH_INITTED)) - continue; - if ((flags & IPATH_VERBS_KERNEL_SMA) && - !(*ss->ipath_statusp & IPATH_STATUS_SMA)) - *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA; - else - *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; - } - - spin_unlock_irqrestore(&ipath_devs_lock, lflags); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags); - -/** - * ipath_layer_get_npkeys - return the size of the PKEY table for port 0 - * @dd: the infinipath device - */ -unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd) -{ - return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys); - -/** - * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table - * @dd: the infinipath device - * @index: the PKEY index - */ -unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index) -{ - unsigned ret; - - if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) - ret = 0; - else - ret = dd->ipath_pd[0]->port_pkeys[index]; - - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_pkey); - -/** - * ipath_layer_get_pkeys - return the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the pkey table is placed here - */ -int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - struct ipath_portdata *pd = dd->ipath_pd[0]; - - memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys); - -/** - * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the infinipath device - * @key: the PKEY index - * - * Return true if this was the last reference and the hardware table entry - * needs to be changed. - */ -static int rm_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (dd->ipath_pkeys[i] != key) - continue; - if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { - dd->ipath_pkeys[i] = 0; - ret = 1; - goto bail; - } - break; - } - - ret = 0; - -bail: - return ret; -} - -/** - * add_pkey - add the given PKEY to the hardware table - * @dd: the infinipath device - * @key: the PKEY - * - * Return an error code if unable to add the entry, zero if no change, - * or 1 if the hardware PKEY register needs to be updated. - */ -static int add_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - u16 lkey = key & 0x7FFF; - int any = 0; - int ret; - - if (lkey == 0x7FFF) { - ret = 0; - goto bail; - } - - /* Look for an empty slot or a matching PKEY. */ - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i]) { - any++; - continue; - } - /* If it matches exactly, try to increment the ref count */ - if (dd->ipath_pkeys[i] == key) { - if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { - ret = 0; - goto bail; - } - /* Lost the race. Look for an empty slot below. */ - atomic_dec(&dd->ipath_pkeyrefs[i]); - any++; - } - /* - * It makes no sense to have both the limited and unlimited - * PKEY set at the same time since the unlimited one will - * disable the limited one. - */ - if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { - ret = -EEXIST; - goto bail; - } - } - if (!any) { - ret = -EBUSY; - goto bail; - } - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i] && - atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { - /* for ipathstats, etc. */ - ipath_stats.sps_pkeys[i] = lkey; - dd->ipath_pkeys[i] = key; - ret = 1; - goto bail; - } - } - ret = -EBUSY; - -bail: - return ret; -} - -/** - * ipath_layer_set_pkeys - set the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the PKEY table - */ -int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - struct ipath_portdata *pd; - int i; - int changed = 0; - - pd = dd->ipath_pd[0]; - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - u16 key = pkeys[i]; - u16 okey = pd->port_pkeys[i]; - - if (key == okey) - continue; - /* - * The value of this PKEY table entry is changing. - * Remove the old entry in the hardware's array of PKEYs. - */ - if (okey & 0x7FFF) - changed |= rm_pkey(dd, okey); - if (key & 0x7FFF) { - int ret = add_pkey(dd, key); - - if (ret < 0) - key = 0; - else - changed |= ret; - } - pd->port_pkeys[i] = key; - } - if (changed) { - u64 pkey; - - pkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", - (unsigned long long) pkey); - ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, - pkey); - } - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys); - -/** - * ipath_layer_get_linkdowndefaultstate - get the default linkdown state - * @dd: the infinipath device - * - * Returns zero if the default is POLL, 1 if the default is SLEEP. - */ -int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd) -{ - return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate); - -/** - * ipath_layer_set_linkdowndefaultstate - set the default linkdown state - * @dd: the infinipath device - * @sleep: the new state - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, - int sleep) -{ - if (sleep) - dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - else - dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate); - -int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold); - -/** - * ipath_layer_set_phyerrthreshold - set the physical error threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold); - -int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold); - -/** - * ipath_layer_set_overrunthreshold - set the overrun threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold); - -int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, - size_t namelen) -{ - return dd->ipath_f_get_boardname(dd, name, namelen); -} -EXPORT_SYMBOL_GPL(ipath_layer_get_boardname); - -u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd) -{ - return dd->ipath_rcvhdrentsize; -} -EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize); diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 71485096fca..3854a4eae68 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -40,73 +40,9 @@ */ struct sk_buff; -struct ipath_sge_state; struct ipath_devdata; struct ether_header; -struct ipath_layer_counters { - u64 symbol_error_counter; - u64 link_error_recovery_counter; - u64 link_downed_counter; - u64 port_rcv_errors; - u64 port_rcv_remphys_errors; - u64 port_xmit_discards; - u64 port_xmit_data; - u64 port_rcv_data; - u64 port_xmit_packets; - u64 port_rcv_packets; - u32 local_link_integrity_errors; - u32 excessive_buffer_overrun_errors; -}; - -/* - * A segment is a linear region of low physical memory. - * XXX Maybe we should use phys addr here and kmap()/kunmap(). - * Used by the verbs layer. - */ -struct ipath_seg { - void *vaddr; - size_t length; -}; - -/* The number of ipath_segs that fit in a page. */ -#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) - -struct ipath_segarray { - struct ipath_seg segs[IPATH_SEGSZ]; -}; - -struct ipath_mregion { - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of ipath_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - struct ipath_segarray *map[0]; /* the segments */ -}; - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct ipath_sge { - struct ipath_mregion *mr; - void *vaddr; /* current pointer into the segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -struct ipath_sge_state { - struct ipath_sge *sg_list; /* next SGE to be used if any */ - struct ipath_sge sge; /* progress state for the current SGE */ - u8 num_sge; -}; - int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), void (*l_remove)(void *), int (*l_intr)(void *, u32), @@ -114,62 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), struct sk_buff *), u16 rcv_opcode, int (*l_rcv_lid)(void *, void *)); -int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *arg), - int (*l_piobufavail)(void *arg), - void (*l_rcv)(void *arg, void *rhdr, - void *data, u32 tlen), - void (*l_timer_cb)(void *arg)); void ipath_layer_unregister(void); -void ipath_verbs_unregister(void); int ipath_layer_open(struct ipath_devdata *, u32 * pktmax); u16 ipath_layer_get_lid(struct ipath_devdata *dd); int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *); u16 ipath_layer_get_bcast(struct ipath_devdata *dd); -u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd); -int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state); -int ipath_layer_set_mtu(struct ipath_devdata *, u16); -int ipath_set_lid(struct ipath_devdata *, u32, u8); int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr); -int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, - u32 * hdr, u32 len, struct ipath_sge_state *ss); int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd); -int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, - size_t namelen); -int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait); -int ipath_layer_get_counters(struct ipath_devdata *dd, - struct ipath_layer_counters *cntrs); -int ipath_layer_want_buffer(struct ipath_devdata *dd); -int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid); -__be64 ipath_layer_get_guid(struct ipath_devdata *); -u32 ipath_layer_get_nguid(struct ipath_devdata *); -u32 ipath_layer_get_majrev(struct ipath_devdata *); -u32 ipath_layer_get_minrev(struct ipath_devdata *); -u32 ipath_layer_get_pcirev(struct ipath_devdata *); -u32 ipath_layer_get_flags(struct ipath_devdata *dd); -struct device *ipath_layer_get_device(struct ipath_devdata *dd); -u16 ipath_layer_get_deviceid(struct ipath_devdata *dd); -u32 ipath_layer_get_vendorid(struct ipath_devdata *); -u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd); -u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd); -int ipath_layer_enable_timer(struct ipath_devdata *dd); -int ipath_layer_disable_timer(struct ipath_devdata *dd); -int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags); -unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd); -unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index); -int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys); -int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys); -int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd); -int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, - int sleep); -int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd); -int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n); -int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd); -int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n); -u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd); /* ipath_ether interrupt values */ #define IPATH_LAYER_INT_IF_UP 0x2 @@ -178,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd); #define IPATH_LAYER_INT_SEND_CONTINUE 0x10 #define IPATH_LAYER_INT_BCAST 0x40 -/* _verbs_layer.l_flags */ -#define IPATH_VERBS_KERNEL_SMA 0x1 - extern unsigned ipath_debug; /* debugging bit mask */ #endif /* _IPATH_LAYER_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index d3402341b7d..25908b02fbe 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -87,7 +87,8 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, struct ipath_devdata *dd = to_idev(ibdev)->dd; u32 vendor, majrev, minrev; - if (smp->attr_mod) + /* GUID 0 is illegal */ + if (smp->attr_mod || (dd->ipath_guid == 0)) smp->status |= IB_SMP_INVALID_FIELD; nip->base_version = 1; @@ -101,15 +102,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, nip->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ nip->sys_guid = to_idev(ibdev)->sys_image_guid; - nip->node_guid = ipath_layer_get_guid(dd); + nip->node_guid = dd->ipath_guid; nip->port_guid = nip->sys_guid; - nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd)); - nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd)); - majrev = ipath_layer_get_majrev(dd); - minrev = ipath_layer_get_minrev(dd); + nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); + nip->device_id = cpu_to_be16(dd->ipath_deviceid); + majrev = dd->ipath_majrev; + minrev = dd->ipath_minrev; nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; - vendor = ipath_layer_get_vendorid(dd); + vendor = dd->ipath_vendorid; nip->vendor_id[0] = 0; nip->vendor_id[1] = vendor >> 8; nip->vendor_id[2] = vendor; @@ -131,15 +132,96 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp, * We only support one GUID for now. If this changes, the * portinfo.guid_cap field needs to be updated too. */ - if (startgx == 0) - /* The first is a copy of the read-only HW GUID. */ - *p = ipath_layer_get_guid(to_idev(ibdev)->dd); - else + if (startgx == 0) { + __be64 g = to_idev(ibdev)->dd->ipath_guid; + if (g == 0) + /* GUID 0 is illegal */ + smp->status |= IB_SMP_INVALID_FIELD; + else + /* The first is a copy of the read-only HW GUID. */ + *p = g; + } else smp->status |= IB_SMP_INVALID_FIELD; return reply(smp); } + +static int get_overrunthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; +} + +/** + * set_overrunthreshold - set the overrun threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +static int get_phyerrthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; +} + +/** + * set_phyerrthreshold - set the physical error threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +/** + * get_linkdowndefaultstate - get the default linkdown state + * @dd: the infinipath device + * + * Returns zero if the default is POLL, 1 if the default is SLEEP. + */ +static int get_linkdowndefaultstate(struct ipath_devdata *dd) +{ + return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); +} + static int recv_subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, u8 port) { @@ -166,7 +248,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, (dev->mkeyprot_resv_lmc >> 6) == 0) pip->mkey = dev->mkey; pip->gid_prefix = dev->gid_prefix; - lid = ipath_layer_get_lid(dev->dd); + lid = dev->dd->ipath_lid; pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; pip->sm_lid = cpu_to_be16(dev->sm_lid); pip->cap_mask = cpu_to_be32(dev->port_cap_flags); @@ -177,14 +259,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, pip->link_width_supported = 3; /* 1x or 4x */ pip->link_width_active = 2; /* 4x */ pip->linkspeed_portstate = 0x10; /* 2.5Gbps */ - ibcstat = ipath_layer_get_lastibcstat(dev->dd); + ibcstat = dev->dd->ipath_lastibcstat; pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1; pip->portphysstate_linkdown = (ipath_cvt_physportstate[ibcstat & 0xf] << 4) | - (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2); + (get_linkdowndefaultstate(dev->dd) ? 1 : 2); pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc; pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */ - switch (ipath_layer_get_ibmtu(dev->dd)) { + switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; break; @@ -217,7 +299,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, pip->mkey_violations = cpu_to_be16(dev->mkey_violations); /* P_KeyViolations are counted by hardware. */ pip->pkey_violations = - cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) - + cpu_to_be16((ipath_get_cr_errpkey(dev->dd) - dev->z_pkey_violations) & 0xFFFF); pip->qkey_violations = cpu_to_be16(dev->qkey_violations); /* Only the hardware GUID is supported for now */ @@ -226,8 +308,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, /* 32.768 usec. response time (guessing) */ pip->resv_resptimevalue = 3; pip->localphyerrors_overrunerrors = - (ipath_layer_get_phyerrthreshold(dev->dd) << 4) | - ipath_layer_get_overrunthreshold(dev->dd); + (get_phyerrthreshold(dev->dd) << 4) | + get_overrunthreshold(dev->dd); /* pip->max_credit_hint; */ /* pip->link_roundtrip_latency[3]; */ @@ -237,6 +319,20 @@ bail: return ret; } +/** + * get_pkeys - return the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the pkey table is placed here + */ +static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) +{ + struct ipath_portdata *pd = dd->ipath_pd[0]; + + memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); + + return 0; +} + static int recv_subn_get_pkeytable(struct ib_smp *smp, struct ib_device *ibdev) { @@ -249,9 +345,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp, memset(smp->data, 0, sizeof(smp->data)); if (startpx == 0) { struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_layer_get_npkeys(dev->dd); + unsigned i, n = ipath_get_npkeys(dev->dd); - ipath_layer_get_pkeys(dev->dd, p); + get_pkeys(dev->dd, p); for (i = 0; i < n; i++) q[i] = cpu_to_be16(p[i]); @@ -269,6 +365,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp, } /** + * set_linkdowndefaultstate - set the default linkdown state + * @dd: the infinipath device + * @sleep: the new state + * + * Note that this will only take effect when the link state changes. + */ +static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep) +{ + if (sleep) + dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + else + dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + return 0; +} + +/** * recv_subn_set_portinfo - set port information * @smp: the incoming SM packet * @ibdev: the infiniband device @@ -290,7 +404,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, u8 state; u16 lstate; u32 mtu; - int ret; + int ret, ore; if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) goto err; @@ -304,7 +418,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); lid = be16_to_cpu(pip->lid); - if (lid != ipath_layer_get_lid(dev->dd)) { + if (lid != dev->dd->ipath_lid) { /* Must be a valid unicast LID address. */ if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) goto err; @@ -342,11 +456,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case 0: /* NOP */ break; case 1: /* SLEEP */ - if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1)) + if (set_linkdowndefaultstate(dev->dd, 1)) goto err; break; case 2: /* POLL */ - if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0)) + if (set_linkdowndefaultstate(dev->dd, 0)) goto err; break; default: @@ -376,7 +490,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* XXX We have already partially updated our state! */ goto err; } - ipath_layer_set_mtu(dev->dd, mtu); + ipath_set_mtu(dev->dd, mtu); dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; @@ -392,20 +506,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, * later. */ if (pip->pkey_violations == 0) - dev->z_pkey_violations = - ipath_layer_get_cr_errpkey(dev->dd); + dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd); if (pip->qkey_violations == 0) dev->qkey_violations = 0; - if (ipath_layer_set_phyerrthreshold( - dev->dd, - (pip->localphyerrors_overrunerrors >> 4) & 0xF)) + ore = pip->localphyerrors_overrunerrors; + if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF)) goto err; - if (ipath_layer_set_overrunthreshold( - dev->dd, - (pip->localphyerrors_overrunerrors & 0xF))) + if (set_overrunthreshold(dev->dd, (ore & 0xF))) goto err; dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; @@ -423,7 +533,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, * is down or is being set to down. */ state = pip->linkspeed_portstate & 0xF; - flags = ipath_layer_get_flags(dev->dd); + flags = dev->dd->ipath_flags; lstate = (pip->portphysstate_linkdown >> 4) & 0xF; if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) goto err; @@ -439,7 +549,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* FALLTHROUGH */ case IB_PORT_DOWN: if (lstate == 0) - if (ipath_layer_get_linkdowndefaultstate(dev->dd)) + if (get_linkdowndefaultstate(dev->dd)) lstate = IPATH_IB_LINKDOWN_SLEEP; else lstate = IPATH_IB_LINKDOWN; @@ -451,7 +561,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, lstate = IPATH_IB_LINKDOWN_DISABLE; else goto err; - ipath_layer_set_linkstate(dev->dd, lstate); + ipath_set_linkstate(dev->dd, lstate); if (flags & IPATH_LINKACTIVE) { event.event = IB_EVENT_PORT_ERR; ib_dispatch_event(&event); @@ -460,7 +570,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case IB_PORT_ARMED: if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE))) break; - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM); if (flags & IPATH_LINKACTIVE) { event.event = IB_EVENT_PORT_ERR; ib_dispatch_event(&event); @@ -469,7 +579,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case IB_PORT_ACTIVE: if (!(flags & IPATH_LINKARMED)) break; - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); event.event = IB_EVENT_PORT_ACTIVE; ib_dispatch_event(&event); break; @@ -493,6 +603,152 @@ done: return ret; } +/** + * rm_pkey - decrecment the reference count for the given PKEY + * @dd: the infinipath device + * @key: the PKEY index + * + * Return true if this was the last reference and the hardware table entry + * needs to be changed. + */ +static int rm_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (dd->ipath_pkeys[i] != key) + continue; + if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { + dd->ipath_pkeys[i] = 0; + ret = 1; + goto bail; + } + break; + } + + ret = 0; + +bail: + return ret; +} + +/** + * add_pkey - add the given PKEY to the hardware table + * @dd: the infinipath device + * @key: the PKEY + * + * Return an error code if unable to add the entry, zero if no change, + * or 1 if the hardware PKEY register needs to be updated. + */ +static int add_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + u16 lkey = key & 0x7FFF; + int any = 0; + int ret; + + if (lkey == 0x7FFF) { + ret = 0; + goto bail; + } + + /* Look for an empty slot or a matching PKEY. */ + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i]) { + any++; + continue; + } + /* If it matches exactly, try to increment the ref count */ + if (dd->ipath_pkeys[i] == key) { + if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { + ret = 0; + goto bail; + } + /* Lost the race. Look for an empty slot below. */ + atomic_dec(&dd->ipath_pkeyrefs[i]); + any++; + } + /* + * It makes no sense to have both the limited and unlimited + * PKEY set at the same time since the unlimited one will + * disable the limited one. + */ + if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i] && + atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { + /* for ipathstats, etc. */ + ipath_stats.sps_pkeys[i] = lkey; + dd->ipath_pkeys[i] = key; + ret = 1; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * set_pkeys - set the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the PKEY table + */ +static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) +{ + struct ipath_portdata *pd; + int i; + int changed = 0; + + pd = dd->ipath_pd[0]; + + for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { + u16 key = pkeys[i]; + u16 okey = pd->port_pkeys[i]; + + if (key == okey) + continue; + /* + * The value of this PKEY table entry is changing. + * Remove the old entry in the hardware's array of PKEYs. + */ + if (okey & 0x7FFF) + changed |= rm_pkey(dd, okey); + if (key & 0x7FFF) { + int ret = add_pkey(dd, key); + + if (ret < 0) + key = 0; + else + changed |= ret; + } + pd->port_pkeys[i] = key; + } + if (changed) { + u64 pkey; + + pkey = (u64) dd->ipath_pkeys[0] | + ((u64) dd->ipath_pkeys[1] << 16) | + ((u64) dd->ipath_pkeys[2] << 32) | + ((u64) dd->ipath_pkeys[3] << 48); + ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", + (unsigned long long) pkey); + ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, + pkey); + } + return 0; +} + static int recv_subn_set_pkeytable(struct ib_smp *smp, struct ib_device *ibdev) { @@ -500,13 +756,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, __be16 *p = (__be16 *) smp->data; u16 *q = (u16 *) smp->data; struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_layer_get_npkeys(dev->dd); + unsigned i, n = ipath_get_npkeys(dev->dd); for (i = 0; i < n; i++) q[i] = be16_to_cpu(p[i]); - if (startpx != 0 || - ipath_layer_set_pkeys(dev->dd, q) != 0) + if (startpx != 0 || set_pkeys(dev->dd, q) != 0) smp->status |= IB_SMP_INVALID_FIELD; return recv_subn_get_pkeytable(smp, ibdev); @@ -844,10 +1099,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; u8 port_select = p->port_select; - ipath_layer_get_counters(dev->dd, &cntrs); + ipath_get_counters(dev->dd, &cntrs); /* Adjust counters for any resets done. */ cntrs.symbol_error_counter -= dev->z_symbol_error_counter; @@ -944,8 +1199,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp, u64 swords, rwords, spkts, rpkts, xwait; u8 port_select = p->port_select; - ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); /* Adjust counters for any resets done. */ swords -= dev->z_port_xmit_data; @@ -978,13 +1233,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp, struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; /* * Since the HW doesn't support clearing counters, we save the * current count and subtract it from future responses. */ - ipath_layer_get_counters(dev->dd, &cntrs); + ipath_get_counters(dev->dd, &cntrs); if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) dev->z_symbol_error_counter = cntrs.symbol_error_counter; @@ -1041,8 +1296,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp, struct ipath_ibdev *dev = to_idev(ibdev); u64 swords, rwords, spkts, rpkts, xwait; - ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) dev->z_port_xmit_data = swords; diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c new file mode 100644 index 00000000000..11b7378ff21 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <asm/pgtable.h> + +#include "ipath_verbs.h" + +/** + * ipath_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct ipath_mmap_info + */ +void ipath_release_mmap_info(struct kref *ref) +{ + struct ipath_mmap_info *ip = + container_of(ref, struct ipath_mmap_info, ref); + + vfree(ip->obj); + kfree(ip); +} + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void ipath_vma_open(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); + ip->mmap_cnt++; +} + +static void ipath_vma_close(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + ip->mmap_cnt--; + kref_put(&ip->ref, ipath_release_mmap_info); +} + +static struct vm_operations_struct ipath_vm_ops = { + .open = ipath_vma_open, + .close = ipath_vma_close, +}; + +/** + * ipath_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct ipath_ibdev *dev = to_idev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct ipath_mmap_info *ip, **pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&dev->pending_lock); + for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (void *) offset != ip->obj) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + *pp = ip->next; + spin_unlock_irq(&dev->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &ipath_vm_ops; + vma->vm_private_data = ip; + ipath_vma_open(vma); + goto done; + } + spin_unlock_irq(&dev->pending_lock); +done: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 4ac31a5da33..a0673c1eef7 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -36,6 +36,18 @@ #include "ipath_verbs.h" +/* Fast memory region */ +struct ipath_fmr { + struct ib_fmr ibfmr; + u8 page_shift; + struct ipath_mregion mr; /* must be last */ +}; + +static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct ipath_fmr, ibfmr); +} + /** * ipath_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region @@ -126,6 +138,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, goto bail; } + mr->mr.pd = pd; mr->mr.user_base = *iova_start; mr->mr.iova = *iova_start; mr->mr.length = 0; @@ -185,6 +198,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, goto bail; } + mr->mr.pd = pd; mr->mr.user_base = region->user_base; mr->mr.iova = region->virt_base; mr->mr.length = region->length; @@ -277,6 +291,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, * Resources are allocated but no valid mapping (RKEY can't be * used). */ + fmr->mr.pd = pd; fmr->mr.user_base = 0; fmr->mr.iova = 0; fmr->mr.length = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 83e557be591..46c1c89bf6a 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -35,7 +35,7 @@ #include <linux/vmalloc.h> #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) @@ -44,19 +44,6 @@ #define find_next_offset(map, off) find_next_zero_bit((map)->page, \ BITS_PER_PAGE, off) -#define TRANS_INVALID 0 -#define TRANS_ANY2RST 1 -#define TRANS_RST2INIT 2 -#define TRANS_INIT2INIT 3 -#define TRANS_INIT2RTR 4 -#define TRANS_RTR2RTS 5 -#define TRANS_RTS2RTS 6 -#define TRANS_SQERR2RTS 7 -#define TRANS_ANY2ERR 8 -#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */ -#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */ -#define TRANS_SQD2RTS 11 /* error if not drained */ - /* * Convert the AETH credit code into the number of credits. */ @@ -287,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt) free_qpn(qpt, qp->ibqp.qp_num); if (!atomic_dec_and_test(&qp->refcount) || !ipath_destroy_qp(&qp->ibqp)) - _VERBS_INFO("QP memory leak!\n"); + ipath_dbg(KERN_INFO "QP memory leak!\n"); qp = nqp; } } @@ -348,6 +335,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; + qp->r_wrid_valid = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -355,26 +343,30 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->r_rq.head = 0; - qp->r_rq.tail = 0; + qp->s_wait_credit = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } qp->r_reuse_sge = 0; } /** * ipath_error_qp - put a QP into an error state * @qp: the QP to put into an error state + * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * QP s_lock should be held and interrupts disabled. */ -void ipath_error_qp(struct ipath_qp *qp) +void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; - _VERBS_INFO("QP%d/%d in error state\n", - qp->ibqp.qp_num, qp->remote_qpn); + ipath_dbg(KERN_INFO "QP%d/%d in error state\n", + qp->ibqp.qp_num, qp->remote_qpn); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ @@ -384,7 +376,6 @@ void ipath_error_qp(struct ipath_qp *qp) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.status = IB_WC_WR_FLUSH_ERR; wc.vendor_err = 0; wc.byte_len = 0; wc.imm_data = 0; @@ -396,6 +387,12 @@ void ipath_error_qp(struct ipath_qp *qp) wc.sl = 0; wc.dlid_path_bits = 0; wc.port_num = 0; + if (qp->r_wrid_valid) { + qp->r_wrid_valid = 0; + wc.status = err; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; while (qp->s_last != qp->s_head) { struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); @@ -410,15 +407,32 @@ void ipath_error_qp(struct ipath_qp *qp) qp->s_hdrwords = 0; qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - wc.opcode = IB_WC_RECV; - spin_lock(&qp->r_rq.lock); - while (qp->r_rq.tail != qp->r_rq.head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id; - if (++qp->r_rq.tail >= qp->r_rq.size) - qp->r_rq.tail = 0; - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + if (qp->r_rq.wq) { + struct ipath_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + wc.opcode = IB_WC_RECV; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); } - spin_unlock(&qp->r_rq.lock); } /** @@ -426,11 +440,12 @@ void ipath_error_qp(struct ipath_qp *qp) * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify + * @udata: user data for ipathverbs.so * * Returns 0 on success, otherwise returns an errno. */ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask) + int attr_mask, struct ib_udata *udata) { struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); @@ -448,26 +463,53 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr_mask)) goto inval; - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid == 0 || attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) goto inval; + if ((attr->ah_attr.ah_flags & IB_AH_GRH) && + (attr->ah_attr.grh.sgid_index > 1)) + goto inval; + } + if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd)) + if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) goto inval; if (attr_mask & IB_QP_MIN_RNR_TIMER) if (attr->min_rnr_timer > 31) goto inval; + if (attr_mask & IB_QP_PORT) + if (attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_PATH_MTU) + if (attr->path_mtu > IB_MTU_4096) + goto inval; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > 1) + goto inval; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + if (attr->max_rd_atomic > 1) + goto inval; + + if (attr_mask & IB_QP_PATH_MIG_STATE) + if (attr->path_mig_state != IB_MIG_MIGRATED && + attr->path_mig_state != IB_MIG_REARM) + goto inval; + switch (new_state) { case IB_QPS_RESET: ipath_reset_qp(qp); break; case IB_QPS_ERR: - ipath_error_qp(qp); + ipath_error_qp(qp, IB_WC_GENERAL_ERR); break; default: @@ -482,7 +524,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn; + qp->s_psn = qp->s_next_psn = attr->sq_psn; qp->s_last_psn = qp->s_next_psn - 1; } @@ -511,6 +553,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; + if (attr_mask & IB_QP_TIMEOUT) + qp->timeout = attr->timeout; + if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; @@ -543,7 +588,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->dest_qp_num = qp->remote_qpn; attr->qp_access_flags = qp->qp_access_flags; attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->r_rq.size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; attr->cap.max_send_sge = qp->s_max_sge; attr->cap.max_recv_sge = qp->r_rq.max_sge; attr->cap.max_inline_data = 0; @@ -557,7 +602,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->max_dest_rd_atomic = 1; attr->min_rnr_timer = qp->r_min_rnr_timer; attr->port_num = 1; - attr->timeout = 0; + attr->timeout = qp->timeout; attr->retry_cnt = qp->s_retry_cnt; attr->rnr_retry = qp->s_rnr_retry; attr->alt_port_num = 0; @@ -569,9 +614,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - init_attr->sq_sig_type = - (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) - ? IB_SIGNAL_REQ_WR : 0; + if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->qp_type = qp->ibqp.qp_type; init_attr->port_num = 1; return 0; @@ -596,13 +642,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp) } else { u32 min, max, x; u32 credits; - + struct ipath_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; /* * Compute the number of credits available (RWQEs). * XXX Not holding the r_rq.lock here so there is a small * chance that the pair of reads are not atomic. */ - credits = qp->r_rq.head - qp->r_rq.tail; + credits = head - tail; if ((int)credits < 0) credits += qp->r_rq.size; /* @@ -679,27 +735,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - qp = kmalloc(sizeof(*qp), GFP_KERNEL); + sz = sizeof(*qp); + if (init_attr->srq) { + struct ipath_srq *srq = to_isrq(init_attr->srq); + + sz += sizeof(*qp->r_sg_list) * + srq->rq.max_sge; + } else + sz += sizeof(*qp->r_sg_list) * + init_attr->cap.max_recv_sge; + qp = kmalloc(sz, GFP_KERNEL); if (!qp) { - vfree(swq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_swq; } if (init_attr->srq) { + sz = 0; qp->r_rq.size = 0; qp->r_rq.max_sge = 0; qp->r_rq.wq = NULL; + init_attr->cap.max_recv_wr = 0; + init_attr->cap.max_recv_sge = 0; } else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) + + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct ipath_rwqe); - qp->r_rq.wq = vmalloc(qp->r_rq.size * sz); + qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); if (!qp->r_rq.wq) { - kfree(qp); - vfree(swq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_qp; } } @@ -719,24 +785,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; - qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ? - 1 << IPATH_S_SIGNAL_REQ_WR : 0; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; + else + qp->s_flags = 0; dev = to_idev(ibpd->device); err = ipath_alloc_qpn(&dev->qp_table, qp, init_attr->qp_type); if (err) { - vfree(swq); - vfree(qp->r_rq.wq); - kfree(qp); ret = ERR_PTR(err); - goto bail; + goto bail_rwq; } + qp->ip = NULL; ipath_reset_qp(qp); - - /* Tell the core driver that the kernel SMA is present. */ - if (init_attr->qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, - IPATH_VERBS_KERNEL_SMA); break; default: @@ -747,8 +808,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->cap.max_inline_data = 0; + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) qp->r_rq.wq; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_rwq; + } + + if (qp->r_rq.wq) { + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_rwq; + } + qp->ip = ip; + ip->context = ibpd->uobject->context; + ip->obj = qp->r_rq.wq; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + } + + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_ipath_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + ret = &qp->ibqp; + goto bail; +bail_ip: + kfree(qp->ip); +bail_rwq: + vfree(qp->r_rq.wq); +bail_qp: + kfree(qp); +bail_swq: + vfree(swq); bail: return ret; } @@ -768,15 +884,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp) struct ipath_ibdev *dev = to_idev(ibqp->device); unsigned long flags; - /* Tell the core driver that the kernel SMA is gone. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, 0); - - spin_lock_irqsave(&qp->r_rq.lock, flags); - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; - spin_unlock(&qp->s_lock); - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + spin_unlock_irqrestore(&qp->s_lock, flags); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); /* Stop the sending tasklet. */ tasklet_kill(&qp->s_task); @@ -797,8 +910,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp) if (atomic_read(&qp->refcount) != 0) ipath_free_qp(&dev->qp_table, qp); + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); vfree(qp->s_wq); - vfree(qp->r_rq.wq); kfree(qp); return 0; } @@ -850,8 +966,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n", - qp->ibqp.qp_num, qp->remote_qpn, wc->status); + ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", + qp->ibqp.qp_num, qp->remote_qpn, wc->status); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 774d1615ce2..a504cf67f27 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_RC_##x @@ -201,6 +201,18 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_rnr_timeout) goto done; + /* Limit the number of packets sent without an ACK. */ + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { + qp->s_wait_credit = 1; + dev->n_rc_stalls++; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); + goto done; + } + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; bth0 = 0; @@ -221,7 +233,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) goto done; - qp->s_psn = wqe->psn = qp->s_next_psn; + wqe->psn = qp->s_next_psn; newreq = 1; } /* @@ -393,12 +405,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -435,12 +441,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -498,6 +498,8 @@ int ipath_make_rc_req(struct ipath_qp *qp, */ goto done; } + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) + bth2 |= 1 << 31; /* Request ACK. */ qp->s_len -= len; qp->s_hdrwords = hwords; qp->s_cur_sge = ss; @@ -540,7 +542,7 @@ static void send_rc_ack(struct ipath_qp *qp) lrh0 = IPATH_LRH_GRH; } /* read pkey_index w/o lock (its atomic) */ - bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << @@ -557,7 +559,7 @@ static void send_rc_ack(struct ipath_qp *qp) hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); + hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); @@ -737,6 +739,15 @@ bail: return; } +static inline void update_last_psn(struct ipath_qp *qp, u32 psn) +{ + if (qp->s_wait_credit) { + qp->s_wait_credit = 0; + tasklet_hi_schedule(&qp->s_task); + } + qp->s_last_psn = psn; +} + /** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on @@ -805,7 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * The last valid PSN seen is the previous * request's. */ - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ ipath_restart_rc(qp, wqe->psn, &wc); /* @@ -864,7 +875,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) ipath_get_credit(qp, aeth); qp->s_rnr_retry = qp->s_rnr_retry_cnt; qp->s_retry = qp->s_retry_cnt; - qp->s_last_psn = psn; + update_last_psn(qp, psn); ret = 1; goto bail; @@ -883,7 +894,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) goto bail; /* The last valid PSN is the previous PSN. */ - qp->s_last_psn = psn - 1; + update_last_psn(qp, psn - 1); dev->n_rc_resends += (int)qp->s_psn - (int)psn; @@ -898,7 +909,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) case 3: /* NAK */ /* The last valid PSN seen is the previous request's. */ if (qp->s_last != qp->s_tail) - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ @@ -1071,7 +1082,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, * since we don't want s_sge modified. */ qp->s_len -= pmtu; - qp->s_last_psn = psn; + update_last_psn(qp, psn); spin_unlock_irqrestore(&qp->s_lock, flags); ipath_copy_sge(&qp->s_sge, data, pmtu); goto bail; @@ -1223,7 +1234,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, * Address range must be a subset of the original * request and start on pmtu boundaries. */ - ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, + ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, qp->s_rdma_len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) { @@ -1282,6 +1293,14 @@ done: return 1; } +static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +{ + spin_lock_irq(&qp->s_lock); + qp->state = IB_QPS_ERR; + ipath_error_qp(qp, err); + spin_unlock_irq(&qp->s_lock); +} + /** * ipath_rc_rcv - process an incoming RC packet * @dev: the device this packet came in on @@ -1309,6 +1328,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, struct ib_reth *reth; int header_in_data; + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + /* Check for GRH */ if (!has_grh) { ohdr = &hdr->u.oth; @@ -1323,8 +1346,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * the eager header buffer size to 56 bytes so the last 4 * bytes of the BTH header (PSN) is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { psn = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); @@ -1371,8 +1393,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, */ if (qp->r_ack_state >= OP(COMPARE_SWAP)) goto send_ack; - /* XXX Flush WQEs */ - qp->state = IB_QPS_ERR; + ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); qp->r_ack_state = OP(SEND_ONLY); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; @@ -1478,9 +1499,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_ONLY)) + if (!qp->r_wrid_valid) break; + qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; @@ -1518,7 +1539,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(dev, &qp->r_sge, + ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) @@ -1560,7 +1581,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, + ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, qp->s_rdma_len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) { @@ -1619,7 +1640,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc; @@ -1671,8 +1692,7 @@ nack_acc: * is pending though. */ if (qp->r_ack_state < OP(COMPARE_SWAP)) { - /* XXX Flush WQEs */ - qp->state = IB_QPS_ERR; + ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); qp->r_ack_state = OP(RDMA_WRITE_ONLY); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 89df8f5ea99..dffc76016d3 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -36,8 +36,7 @@ /* * This file should only be included by kernel source, and by the diags. It - * defines the registers, and their contents, for the InfiniPath HT-400 - * chip. + * defines the registers, and their contents, for InfiniPath chips. */ /* @@ -135,10 +134,24 @@ #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 #define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL #define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44 -#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL -#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL #define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL #define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL +/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL +/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL +/* waldo specific -- find the rest in ipath_6110.c */ +#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL +/* monty specific -- find the rest in ipath_6120.c */ +#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL /* kr_hwdiagctrl bits */ #define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL @@ -210,9 +223,9 @@ /* combination link status states that we use with some frequency */ #define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \ - << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ + << INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \ (INFINIPATH_IBCS_LINKSTATE_MASK \ - <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)) + <<INFINIPATH_IBCS_LINKSTATE_SHIFT)) #define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \ << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ (INFINIPATH_IBCS_LT_STATE_LINKUP \ @@ -283,10 +296,12 @@ #define INFINIPATH_XGXS_RESET 0x7ULL #define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4 +#define INFINIPATH_XGXS_RX_POL_SHIFT 19 +#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ -/* TID entries (memory), HT400-only */ +/* TID entries (memory), HT-only */ #define INFINIPATH_RT_VALID 0x8000000000000000ULL #define INFINIPATH_RT_ADDR_SHIFT 0 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF @@ -301,6 +316,17 @@ typedef u64 ipath_err_t; +/* The following change with the type of device, so + * need to be part of the ipath_devdata struct, or + * we could have problems plugging in devices of + * different types (e.g. one HT, one PCIE) + * in one system, to be managed by one driver. + * On the other hand, this file is may also be included + * by other code, so leave the declarations here + * temporarily. Minor footprint issue if common-model + * linker used, none if C89+ linker used. + */ + /* mask of defined bits for various registers */ extern u64 infinipath_i_bitsextant; extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; @@ -309,13 +335,6 @@ extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; /* - * register bits for selecting i2c direction and values, used for I2C serial - * flash - */ -extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num; -extern u64 ipath_gpio_sda, ipath_gpio_scl; - -/* * These are the infinipath general register numbers (not offsets). * The kernel registers are used directly, those beyond the kernel * registers are calculated from one of the base registers. The use of diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 772bc59fb85..f7530512045 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* * Convert the AETH RNR timeout code into the number of milliseconds. @@ -106,6 +106,52 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) spin_unlock_irqrestore(&dev->pending_lock, flags); } +static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe) +{ + int user = to_ipd(qp->ibqp.pd)->user; + int i, j, ret; + struct ib_wc wc; + + qp->r_len = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if ((user && wqe->sg_list[i].lkey == 0) || + !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + qp->r_len += wqe->sg_list[i].length; + j++; + } + qp->r_sge.sge = qp->r_sg_list[0]; + qp->r_sge.sg_list = qp->r_sg_list + 1; + qp->r_sge.num_sge = j; + ret = 1; + goto bail; + +bad_lkey: + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp_num = qp->ibqp.qp_num; + wc.src_qp = 0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = 0; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal solicited completion event. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} + /** * ipath_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP @@ -119,71 +165,72 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) { unsigned long flags; struct ipath_rq *rq; + struct ipath_rwq *wq; struct ipath_srq *srq; struct ipath_rwqe *wqe; - int ret = 1; + void (*handler)(struct ib_event *, void *); + u32 tail; + int ret; - if (!qp->ibqp.srq) { + if (qp->ibqp.srq) { + srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; rq = &qp->r_rq; - spin_lock_irqsave(&rq->lock, flags); - - if (unlikely(rq->tail == rq->head)) { - ret = 0; - goto done; - } - wqe = get_rwqe_ptr(rq, rq->tail); - qp->r_wr_id = wqe->wr_id; - if (!wr_id_only) { - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - qp->r_len = wqe->length; - } - if (++rq->tail >= rq->size) - rq->tail = 0; - goto done; } - srq = to_isrq(qp->ibqp.srq); - rq = &srq->rq; spin_lock_irqsave(&rq->lock, flags); - - if (unlikely(rq->tail == rq->head)) { - ret = 0; - goto done; - } - wqe = get_rwqe_ptr(rq, rq->tail); + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + do { + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + ret = 0; + goto bail; + } + wqe = get_rwqe_ptr(rq, tail); + if (++tail >= rq->size) + tail = 0; + } while (!wr_id_only && !init_sge(qp, wqe)); qp->r_wr_id = wqe->wr_id; - if (!wr_id_only) { - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - qp->r_len = wqe->length; - } - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq->ibsrq.event_handler) { - struct ib_event ev; + wq->tail = tail; + + ret = 1; + if (handler) { u32 n; - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; else - n = rq->head - rq->tail; + n -= tail; if (n < srq->limit) { + struct ib_event ev; + srq->limit = 0; spin_unlock_irqrestore(&rq->lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); + handler(&ev, srq->ibsrq.srq_context); goto bail; } } - -done: spin_unlock_irqrestore(&rq->lock, flags); + qp->r_wrid_valid = 1; + bail: return ret; } @@ -278,7 +325,7 @@ again: case IB_WR_RDMA_WRITE: if (wqe->length == 0) break; - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_WRITE))) { @@ -302,7 +349,7 @@ again: break; case IB_WR_RDMA_READ: - if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) @@ -317,7 +364,7 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64), + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_ATOMIC))) @@ -422,6 +469,15 @@ done: wake_up(&qp->wait); } +static int want_buffer(struct ipath_devdata *dd) +{ + set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + + return 0; +} + /** * ipath_no_bufs_available - tell the layer driver we need buffers * @qp: the QP that caused the problem @@ -438,7 +494,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) list_add_tail(&qp->piowait, &dev->piowait); spin_unlock_irqrestore(&dev->pending_lock, flags); /* - * Note that as soon as ipath_layer_want_buffer() is called and + * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() * could be called. If we are still in the tasklet function, * tasklet_hi_schedule() will not call us until the next time @@ -448,7 +504,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) */ clear_bit(IPATH_S_BUSY, &qp->s_flags); tasklet_unlock(&qp->s_task); - ipath_layer_want_buffer(dev->dd); + want_buffer(dev->dd); dev->n_piowait++; } @@ -518,8 +574,7 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr) } if (wr->sg_list[i].length == 0) continue; - if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table, - &wqe->sg_list[j], &wr->sg_list[i], + if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i], acc)) { spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EINVAL; @@ -563,7 +618,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, hdr->hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = dev->gid_prefix; - hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd); + hdr->sgid.global.interface_id = dev->dd->ipath_guid; hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ @@ -595,8 +650,7 @@ void ipath_do_ruc_send(unsigned long data) if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) goto bail; - if (unlikely(qp->remote_ah_attr.dlid == - ipath_layer_get_lid(dev->dd))) { + if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { ipath_ruc_loopback(qp); goto clear; } @@ -663,8 +717,8 @@ again: qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); - bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index f760434660b..94033503400 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_ibdev *dev = to_idev(ibsrq->device); + struct ipath_rwq *wq; unsigned long flags; int ret; for (; wr; wr = wr->next) { struct ipath_rwqe *wqe; u32 next; - int i, j; + int i; - if (wr->num_sge > srq->rq.max_sge) { + if ((unsigned) wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; ret = -ENOMEM; goto bail; } spin_lock_irqsave(&srq->rq.lock, flags); - next = srq->rq.head + 1; + wq = srq->rq.wq; + next = wq->head + 1; if (next >= srq->rq.size) next = 0; - if (next == srq->rq.tail) { + if (next == wq->tail) { spin_unlock_irqrestore(&srq->rq.lock, flags); *bad_wr = wr; ret = -ENOMEM; goto bail; } - wqe = get_rwqe_ptr(&srq->rq, srq->rq.head); + wqe = get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; - wqe->sg_list[0].mr = NULL; - wqe->sg_list[0].vaddr = NULL; - wqe->sg_list[0].length = 0; - wqe->sg_list[0].sge_length = 0; - wqe->length = 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - /* Check LKEY */ - if (to_ipd(srq->ibsrq.pd)->user && - wr->sg_list[i].lkey == 0) { - spin_unlock_irqrestore(&srq->rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - if (wr->sg_list[i].length == 0) - continue; - if (!ipath_lkey_ok(&dev->lk_table, - &wqe->sg_list[j], - &wr->sg_list[i], - IB_ACCESS_LOCAL_WRITE)) { - spin_unlock_irqrestore(&srq->rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - wqe->length += wr->sg_list[i].length; - j++; - } - wqe->num_sge = j; - srq->rq.head = next; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + wq->head = next; spin_unlock_irqrestore(&srq->rq.lock, flags); } ret = 0; @@ -131,55 +104,99 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; - if (dev->n_srqs_allocated == ib_ipath_max_srqs) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) || (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } srq = kmalloc(sizeof(*srq), GFP_KERNEL); if (!srq) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } /* * Need to use vmalloc() if we want to support large #s of entries. */ srq->rq.size = srq_init_attr->attr.max_wr + 1; - sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge + + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + sizeof(struct ipath_rwqe); - srq->rq.wq = vmalloc(srq->rq.size * sz); + srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz); if (!srq->rq.wq) { - kfree(srq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_srq; } /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) srq->rq.wq; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_wq; + } + + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + srq->ip = ip; + ip->context = ibpd->uobject->context; + ip->obj = srq->rq.wq; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + srq->rq.size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } else + srq->ip = NULL; + + /* * ib_create_srq() will initialize srq->ibsrq. */ spin_lock_init(&srq->rq.lock); - srq->rq.head = 0; - srq->rq.tail = 0; - srq->rq.max_sge = srq_init_attr->attr.max_sge; + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; srq->limit = srq_init_attr->attr.srq_limit; + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == ib_ipath_max_srqs) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); + ret = &srq->ibsrq; + goto done; - dev->n_srqs_allocated++; +bail_wq: + vfree(srq->rq.wq); -bail: +bail_srq: + kfree(srq); + +done: return ret; } @@ -188,83 +205,130 @@ bail: * @ibsrq: the SRQ to modify * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify + * @udata: user data for ipathverbs.so */ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask) + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); - unsigned long flags; - int ret; + int ret = 0; - if (attr_mask & IB_SRQ_MAX_WR) - if ((attr->max_wr > ib_ipath_max_srq_wrs) || - (attr->max_sge > srq->rq.max_sge)) { - ret = -EINVAL; - goto bail; - } + if (attr_mask & IB_SRQ_MAX_WR) { + struct ipath_rwq *owq; + struct ipath_rwq *wq; + struct ipath_rwqe *p; + u32 sz, size, n, head, tail; - if (attr_mask & IB_SRQ_LIMIT) - if (attr->srq_limit >= srq->rq.size) { + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > ib_ipath_max_srq_wrs) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) { ret = -EINVAL; goto bail; } - if (attr_mask & IB_SRQ_MAX_WR) { - struct ipath_rwqe *wq, *p; - u32 sz, size, n; - sz = sizeof(struct ipath_rwqe) + - attr->max_sge * sizeof(struct ipath_sge); + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; - wq = vmalloc(size * sz); + wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz); if (!wq) { ret = -ENOMEM; goto bail; } - spin_lock_irqsave(&srq->rq.lock, flags); - if (srq->rq.head < srq->rq.tail) - n = srq->rq.size + srq->rq.head - srq->rq.tail; + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = (__u64) wq; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) { + vfree(wq); + goto bail; + } + udata->outbuf = (void __user *) offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) { + vfree(wq); + goto bail; + } + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + if (head >= srq->rq.size) + head = 0; + tail = owq->tail; + if (tail >= srq->rq.size) + tail = 0; + n = head; + if (n < tail) + n += srq->rq.size - tail; else - n = srq->rq.head - srq->rq.tail; - if (size <= n || size <= srq->limit) { - spin_unlock_irqrestore(&srq->rq.lock, flags); + n -= tail; + if (size <= n) { + spin_unlock_irq(&srq->rq.lock); vfree(wq); ret = -EINVAL; goto bail; } n = 0; - p = wq; - while (srq->rq.tail != srq->rq.head) { + p = wq->wq; + while (tail != head) { struct ipath_rwqe *wqe; int i; - wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail); + wqe = get_rwqe_ptr(&srq->rq, tail); p->wr_id = wqe->wr_id; - p->length = wqe->length; p->num_sge = wqe->num_sge; for (i = 0; i < wqe->num_sge; i++) p->sg_list[i] = wqe->sg_list[i]; n++; p = (struct ipath_rwqe *)((char *) p + sz); - if (++srq->rq.tail >= srq->rq.size) - srq->rq.tail = 0; + if (++tail >= srq->rq.size) + tail = 0; } - vfree(srq->rq.wq); srq->rq.wq = wq; srq->rq.size = size; - srq->rq.head = n; - srq->rq.tail = 0; - srq->rq.max_sge = attr->max_sge; - spin_unlock_irqrestore(&srq->rq.lock, flags); - } - - if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irqsave(&srq->rq.lock, flags); - srq->limit = attr->srq_limit; - spin_unlock_irqrestore(&srq->rq.lock, flags); + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct ipath_mmap_info *ip = srq->ip; + struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); + + ip->obj = wq; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); } - ret = 0; bail: return ret; @@ -289,8 +353,13 @@ int ipath_destroy_srq(struct ib_srq *ibsrq) struct ipath_srq *srq = to_isrq(ibsrq); struct ipath_ibdev *dev = to_idev(ibsrq->device); + spin_lock(&dev->n_srqs_lock); dev->n_srqs_allocated--; - vfree(srq->rq.wq); + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, ipath_release_mmap_info); + else + vfree(srq->rq.wq); kfree(srq); return 0; diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 70351b7e35c..30a825928fc 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque) } } - if (dd->ipath_nosma_bufs) { - dd->ipath_nosma_secs += 5; - if (dd->ipath_nosma_secs >= 30) { - ipath_cdbg(SMA, "No SMA bufs avail %u seconds; " - "cancelling pending sends\n", - dd->ipath_nosma_secs); - /* - * issue an abort as well, in case we have a packet - * stuck in launch fifo. This could corrupt an - * outgoing user packet in the worst case, - * but this is a pretty catastrophic, anyway. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, - dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k - - dd->ipath_lastport_piobuf); - /* start again, if necessary */ - dd->ipath_nosma_secs = 0; - } else - ipath_cdbg(SMA, "No SMA bufs avail %u tries, " - "after %u seconds\n", - dd->ipath_nosma_bufs, - dd->ipath_nosma_secs); - } - done: mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); } diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index b98821d7801..182de34f9f4 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -35,7 +35,6 @@ #include <linux/pci.h> #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" /** @@ -76,7 +75,7 @@ bail: static ssize_t show_version(struct device_driver *dev, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version); + return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version); } static ssize_t show_num_units(struct device_driver *dev, char *buf) @@ -108,8 +107,8 @@ static const char *ipath_status_str[] = { "Initted", "Disabled", "Admin_Disabled", - "OIB_SMA", - "SMA", + "", /* This used to be the old "OIB_SMA" status. */ + "", /* This used to be the old "SMA" status. */ "Present", "IB_link_up", "IB_configured", @@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev, unit = dd->ipath_unit; dd->ipath_mlid = mlid; - ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST); goto bail; invalid: @@ -259,7 +257,7 @@ static ssize_t store_guid(struct device *dev, struct ipath_devdata *dd = dev_get_drvdata(dev); ssize_t ret; unsigned short guid[8]; - __be64 nguid; + __be64 new_guid; u8 *ng; int i; @@ -268,7 +266,7 @@ static ssize_t store_guid(struct device *dev, &guid[4], &guid[5], &guid[6], &guid[7]) != 8) goto invalid; - ng = (u8 *) &nguid; + ng = (u8 *) &new_guid; for (i = 0; i < 8; i++) { if (guid[i] > 0xff) @@ -276,7 +274,10 @@ static ssize_t store_guid(struct device *dev, ng[i] = guid[i]; } - dd->ipath_guid = nguid; + if (new_guid == 0) + goto invalid; + + dd->ipath_guid = new_guid; dd->ipath_nguid = 1; ret = strlen(buf); @@ -299,6 +300,16 @@ static ssize_t show_nguid(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid); } +static ssize_t show_nports(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + /* Return the number of user ports available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1); +} + static ssize_t show_serial(struct device *dev, struct device_attribute *attr, char *buf) @@ -467,7 +478,7 @@ static ssize_t store_link_state(struct device *dev, if (ret < 0) goto invalid; - r = ipath_layer_set_linkstate(dd, state); + r = ipath_set_linkstate(dd, state); if (r < 0) { ret = r; goto bail; @@ -502,7 +513,7 @@ static ssize_t store_mtu(struct device *dev, if (ret < 0) goto invalid; - r = ipath_layer_set_mtu(dd, mtu); + r = ipath_set_mtu(dd, mtu); if (r < 0) ret = r; @@ -563,6 +574,33 @@ bail: return ret; } +static ssize_t store_rx_pol_inv(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret < 0) + goto invalid; + + r = ipath_set_rx_pol_inv(dd, val); + if (r < 0) { + ret = r; + goto bail; + } + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n"); +bail: + return ret; +} + + static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -583,12 +621,14 @@ static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid); static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu); static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled); static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL); +static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset); static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); +static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -597,12 +637,14 @@ static struct attribute *dev_attributes[] = { &dev_attr_mlid.attr, &dev_attr_mtu.attr, &dev_attr_nguid.attr, + &dev_attr_nports.attr, &dev_attr_serial.attr, &dev_attr_status.attr, &dev_attr_status_str.attr, &dev_attr_boardversion.attr, &dev_attr_unit.attr, &dev_attr_enabled.attr, + &dev_attr_rx_pol_inv.attr, NULL }; diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index c33abea2d5a..e636cfd67a8 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_UC_##x @@ -246,6 +246,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, struct ib_reth *reth; int header_in_data; + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + /* Check for GRH */ if (!has_grh) { ohdr = &hdr->u.oth; @@ -261,8 +265,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * size to 56 bytes so the last 4 bytes of * the BTH header (PSN) is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { psn = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); @@ -441,7 +444,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int ok; /* Check rkey */ - ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len, + ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) { diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 3466129af80..49f1102af8b 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -34,7 +34,52 @@ #include <rdma/ib_smi.h> #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" + +static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, + u32 *lengthp, struct ipath_sge_state *ss) +{ + int user = to_ipd(qp->ibqp.pd)->user; + int i, j, ret; + struct ib_wc wc; + + *lengthp = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if ((user && wqe->sg_list[i].lkey == 0) || + !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + *lengthp += wqe->sg_list[i].length; + j++; + } + ss->num_sge = j; + ret = 1; + goto bail; + +bad_lkey: + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp_num = qp->ibqp.qp_num; + wc.src_qp = 0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = 0; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal solicited completion event. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} /** * ipath_ud_loopback - handle send on loopback QPs @@ -46,6 +91,8 @@ * * This is called from ipath_post_ud_send() to forward a WQE addressed * to the same HCA. + * Note that the receive interrupt handler may be calling ipath_ud_rcv() + * while this is being called. */ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss, @@ -60,7 +107,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_srq *srq; struct ipath_sge_state rsge; struct ipath_sge *sge; + struct ipath_rwq *wq; struct ipath_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + u32 tail; + u32 rlen; qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn); if (!qp) @@ -94,6 +145,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, wc->imm_data = 0; } + if (wr->num_sge > 1) { + rsge.sg_list = kmalloc((wr->num_sge - 1) * + sizeof(struct ipath_sge), + GFP_ATOMIC); + } else + rsge.sg_list = NULL; + /* * Get the next work request entry to find where to put the data. * Note that it is safe to drop the lock after changing rq->tail @@ -101,37 +159,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, */ if (qp->ibqp.srq) { srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; rq = &srq->rq; } else { srq = NULL; + handler = NULL; rq = &qp->r_rq; } + spin_lock_irqsave(&rq->lock, flags); - if (rq->tail == rq->head) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto done; + wq = rq->wq; + tail = wq->tail; + while (1) { + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto bail_sge; + } + wqe = get_rwqe_ptr(rq, tail); + if (++tail >= rq->size) + tail = 0; + if (init_sge(qp, wqe, &rlen, &rsge)) + break; + wq->tail = tail; } /* Silently drop packets which are too big. */ - wqe = get_rwqe_ptr(rq, rq->tail); - if (wc->byte_len > wqe->length) { + if (wc->byte_len > rlen) { spin_unlock_irqrestore(&rq->lock, flags); dev->n_pkt_drops++; - goto done; + goto bail_sge; } + wq->tail = tail; wc->wr_id = wqe->wr_id; - rsge.sge = wqe->sg_list[0]; - rsge.sg_list = wqe->sg_list + 1; - rsge.num_sge = wqe->num_sge; - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq && srq->ibsrq.event_handler) { + if (handler) { u32 n; - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; else - n = rq->head - rq->tail; + n -= tail; if (n < srq->limit) { struct ib_event ev; @@ -140,12 +213,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); + handler(&ev, srq->ibsrq.srq_context); } else spin_unlock_irqrestore(&rq->lock, flags); } else spin_unlock_irqrestore(&rq->lock, flags); + ah_attr = &to_iah(wr->wr.ud.ah)->attr; if (ah_attr->ah_flags & IB_AH_GRH) { ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); @@ -186,7 +259,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, wc->src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ wc->pkey_index = 0; - wc->slid = ipath_layer_get_lid(dev->dd) | + wc->slid = dev->dd->ipath_lid | (ah_attr->src_path_bits & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1)); wc->sl = ah_attr->sl; @@ -196,6 +269,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, wr->send_flags & IB_SEND_SOLICITED); +bail_sge: + kfree(rsge.sg_list); done: if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -266,7 +341,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) if (wr->sg_list[i].length == 0) continue; - if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ? + if (!ipath_lkey_ok(qp, ss.num_sge ? sg_list + ss.num_sge - 1 : &ss.sge, &wr->sg_list[i], 0)) { ret = -EINVAL; @@ -276,7 +351,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) ss.num_sge++; } /* Check for invalid packet size. */ - if (len > ipath_layer_get_ibmtu(dev->dd)) { + if (len > dev->dd->ipath_ibmtu) { ret = -EINVAL; goto bail; } @@ -298,7 +373,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) dev->n_unicast_xmit++; lid = ah_attr->dlid & ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); - if (unlikely(lid == ipath_layer_get_lid(dev->dd))) { + if (unlikely(lid == dev->dd->ipath_lid)) { /* * Pass in an uninitialized ib_wc to save stack * space. @@ -327,7 +402,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix; qp->s_hdr.u.l.grh.sgid.global.interface_id = - ipath_layer_get_guid(dev->dd); + dev->dd->ipath_guid; qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid; /* * Don't worry about sending to locally attached multicast @@ -357,7 +432,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC); - lid = ipath_layer_get_lid(dev->dd); + lid = dev->dd->ipath_lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); @@ -368,7 +443,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) bth0 |= 1 << 23; bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : - ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + ipath_get_pkey(dev->dd, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -433,13 +508,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int opcode; u32 hdrsize; u32 pad; - unsigned long flags; struct ib_wc wc; u32 qkey; u32 src_qp; - struct ipath_rq *rq; - struct ipath_srq *srq; - struct ipath_rwqe *wqe; u16 dlid; int header_in_data; @@ -458,8 +529,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * the eager header buffer size to 56 bytes so the last 12 * bytes of the IB header is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { qkey = be32_to_cpu(((__be32 *) data)[1]); src_qp = be32_to_cpu(((__be32 *) data)[2]); @@ -547,19 +617,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* * Get the next work request entry to find where to put the data. - * Note that it is safe to drop the lock after changing rq->tail - * since ipath_post_receive() won't fill the empty slot. */ - if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); - rq = &srq->rq; - } else { - srq = NULL; - rq = &qp->r_rq; - } - spin_lock_irqsave(&rq->lock, flags); - if (rq->tail == rq->head) { - spin_unlock_irqrestore(&rq->lock, flags); + if (qp->r_reuse_sge) + qp->r_reuse_sge = 0; + else if (!ipath_get_rwqe(qp, 0)) { /* * Count VL15 packets dropped due to no receive buffer. * Otherwise, count them as buffer overruns since usually, @@ -573,39 +634,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto bail; } /* Silently drop packets which are too big. */ - wqe = get_rwqe_ptr(rq, rq->tail); - if (wc.byte_len > wqe->length) { - spin_unlock_irqrestore(&rq->lock, flags); + if (wc.byte_len > qp->r_len) { + qp->r_reuse_sge = 1; dev->n_pkt_drops++; goto bail; } - wc.wr_id = wqe->wr_id; - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq && srq->ibsrq.event_handler) { - u32 n; - - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; - else - n = rq->head - rq->tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); - } else - spin_unlock_irqrestore(&rq->lock, flags); - } else - spin_unlock_irqrestore(&rq->lock, flags); if (has_grh) { ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh)); @@ -614,6 +647,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); + wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.vendor_err = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index e32fca9faf8..413754b1d8a 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -90,6 +90,62 @@ bail: } /** + * ipath_map_page - a safety wrapper around pci_map_page() + * + * A dma_addr of all 0's is interpreted by the chip as "disabled". + * Unfortunately, it can also be a valid dma_addr returned on some + * architectures. + * + * The powerpc iommu assigns dma_addrs in ascending order, so we don't + * have to bother with retries or mapping a dummy page to insure we + * don't just get the same mapping again. + * + * I'm sure we won't be so lucky with other iommu's, so FIXME. + */ +dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + dma_addr_t phys; + + phys = pci_map_page(hwdev, page, offset, size, direction); + + if (phys == 0) { + pci_unmap_page(hwdev, phys, size, direction); + phys = pci_map_page(hwdev, page, offset, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** + * ipath_map_single - a safety wrapper around pci_map_single() + * + * Same idea as ipath_map_page(). + */ +dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size, + int direction) +{ + dma_addr_t phys; + + phys = pci_map_single(hwdev, ptr, size, direction); + + if (phys == 0) { + pci_unmap_single(hwdev, phys, size, direction); + phys = pci_map_single(hwdev, ptr, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** * ipath_get_user_pages - lock user pages into memory * @start_page: the start page * @num_pages: the number of pages diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index d70a9b6b523..42eaed88c28 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -33,15 +33,13 @@ #include <rdma/ib_mad.h> #include <rdma/ib_user_verbs.h> +#include <linux/io.h> #include <linux/utsname.h> #include "ipath_kernel.h" #include "ipath_verbs.h" #include "ipath_common.h" -/* Not static, because we don't want the compiler removing it */ -const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR; - static unsigned int ib_ipath_qp_table_size = 251; module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -52,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, MODULE_PARM_DESC(lkey_table_size, "LKEY table size in bits (2^n, 1 <= n <= 23)"); -unsigned int ib_ipath_debug; /* debug mask */ -module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(debug, "Verbs debug mask"); - static unsigned int ib_ipath_max_pds = 0xFFFF; module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_pds, @@ -79,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); +unsigned int ib_ipath_max_qps = 16384; +module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); + unsigned int ib_ipath_max_sges = 0x60; module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); @@ -109,9 +107,9 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("QLogic <support@pathscale.com>"); -MODULE_DESCRIPTION("QLogic InfiniPath driver"); +static unsigned int ib_ipath_disable_sma; +module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA"); const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, @@ -125,6 +123,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_ERR] = 0, }; +struct ipath_ucontext { + struct ib_ucontext ibucontext; +}; + +static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct ipath_ucontext, ibucontext); +} + /* * Translate ib_wr_opcode into ib_wc_opcode. */ @@ -277,11 +285,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct ipath_qp *qp = to_iqp(ibqp); + struct ipath_rwq *wq = qp->r_rq.wq; unsigned long flags; int ret; /* Check that state is OK to post receive. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { *bad_wr = wr; ret = -EINVAL; goto bail; @@ -290,59 +299,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, for (; wr; wr = wr->next) { struct ipath_rwqe *wqe; u32 next; - int i, j; + int i; - if (wr->num_sge > qp->r_rq.max_sge) { + if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; ret = -ENOMEM; goto bail; } spin_lock_irqsave(&qp->r_rq.lock, flags); - next = qp->r_rq.head + 1; + next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; - if (next == qp->r_rq.tail) { + if (next == wq->tail) { spin_unlock_irqrestore(&qp->r_rq.lock, flags); *bad_wr = wr; ret = -ENOMEM; goto bail; } - wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head); + wqe = get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; - wqe->sg_list[0].mr = NULL; - wqe->sg_list[0].vaddr = NULL; - wqe->sg_list[0].length = 0; - wqe->sg_list[0].sge_length = 0; - wqe->length = 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - /* Check LKEY */ - if (to_ipd(qp->ibqp.pd)->user && - wr->sg_list[i].lkey == 0) { - spin_unlock_irqrestore(&qp->r_rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - if (wr->sg_list[i].length == 0) - continue; - if (!ipath_lkey_ok( - &to_idev(qp->ibqp.device)->lk_table, - &wqe->sg_list[j], &wr->sg_list[i], - IB_ACCESS_LOCAL_WRITE)) { - spin_unlock_irqrestore(&qp->r_rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - wqe->length += wr->sg_list[i].length; - j++; - } - wqe->num_sge = j; - qp->r_rq.head = next; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + wq->head = next; spin_unlock_irqrestore(&qp->r_rq.lock, flags); } ret = 0; @@ -377,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: + if (ib_ipath_disable_sma) + break; + /* FALLTHROUGH */ case IB_QPT_UD: ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); break; @@ -395,7 +379,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, } /** - * ipath_ib_rcv - process and incoming packet + * ipath_ib_rcv - process an incoming packet * @arg: the device pointer * @rhdr: the header of the packet * @data: the packet data @@ -404,9 +388,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, * This is called from ipath_kreceive() to process an incoming packet at * interrupt level. Tlen is the length of the header + data + CRC in bytes. */ -static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen) +void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, + u32 tlen) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_ib_header *hdr = rhdr; struct ipath_other_headers *ohdr; struct ipath_qp *qp; @@ -427,7 +411,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen) lid = be16_to_cpu(hdr->lrh[1]); if (lid < IPATH_MULTICAST_LID_BASE) { lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); - if (unlikely(lid != ipath_layer_get_lid(dev->dd))) { + if (unlikely(lid != dev->dd->ipath_lid)) { dev->rcv_errors++; goto bail; } @@ -495,9 +479,8 @@ bail:; * This is called from ipath_do_rcv_timer() at interrupt level to check for * QPs which need retransmits and to collect performance numbers. */ -static void ipath_ib_timer(void *arg) +void ipath_ib_timer(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_qp *resend = NULL; struct list_head *last; struct ipath_qp *qp; @@ -539,19 +522,19 @@ static void ipath_ib_timer(void *arg) if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && --dev->pma_sample_start == 0) { dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; - ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword, - &dev->ipath_rword, - &dev->ipath_spkts, - &dev->ipath_rpkts, - &dev->ipath_xmit_wait); + ipath_snapshot_counters(dev->dd, &dev->ipath_sword, + &dev->ipath_rword, + &dev->ipath_spkts, + &dev->ipath_rpkts, + &dev->ipath_xmit_wait); } if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { if (dev->pma_sample_interval == 0) { u64 ta, tb, tc, td, te; dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; - ipath_layer_snapshot_counters(dev->dd, &ta, &tb, - &tc, &td, &te); + ipath_snapshot_counters(dev->dd, &ta, &tb, + &tc, &td, &te); dev->ipath_sword = ta - dev->ipath_sword; dev->ipath_rword = tb - dev->ipath_rword; @@ -581,6 +564,365 @@ static void ipath_ib_timer(void *arg) } } +static void update_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + sge->vaddr += length; + sge->length -= length; + sge->sge_length -= length; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + return; + sge->n = 0; + } + sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = sge->mr->map[sge->m]->segs[sge->n].length; + } +} + +#ifdef __LITTLE_ENDIAN +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); + data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#else +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); + data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#endif + +static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, + u32 length) +{ + u32 extra = 0; + u32 data = 0; + u32 last; + + while (1) { + u32 len = ss->sge.length; + u32 off; + + BUG_ON(len == 0); + if (len > length) + len = length; + if (len > ss->sge.sge_length) + len = ss->sge.sge_length; + /* If the source address is not aligned, try to align it. */ + off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); + if (off) { + u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & + ~(sizeof(u32) - 1)); + u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); + u32 y; + + y = sizeof(u32) - off; + if (len > y) + len = y; + if (len + extra >= sizeof(u32)) { + data |= set_upper_bits(v, extra * + BITS_PER_BYTE); + len = sizeof(u32) - extra; + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, len, extra); + if (len == length) { + last = data; + break; + } + extra += len; + } + } else if (extra) { + /* Source address is aligned. */ + u32 *addr = (u32 *) ss->sge.vaddr; + int shift = extra * BITS_PER_BYTE; + int ushift = 32 - shift; + u32 l = len; + + while (l >= sizeof(u32)) { + u32 v = *addr; + + data |= set_upper_bits(v, shift); + __raw_writel(data, piobuf); + data = get_upper_bits(v, ushift); + piobuf++; + addr++; + l -= sizeof(u32); + } + /* + * We still have 'extra' number of bytes leftover. + */ + if (l) { + u32 v = *addr; + + if (l + extra >= sizeof(u32)) { + data |= set_upper_bits(v, shift); + len -= l + extra - sizeof(u32); + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, l, + extra); + if (len == length) { + last = data; + break; + } + extra += l; + } + } else if (len == length) { + last = data; + break; + } + } else if (len == length) { + u32 w; + + /* + * Need to round up for the last dword in the + * packet. + */ + w = (len + 3) >> 2; + __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); + piobuf += w - 1; + last = ((u32 *) ss->sge.vaddr)[w - 1]; + break; + } else { + u32 w = len >> 2; + + __iowrite32_copy(piobuf, ss->sge.vaddr, w); + piobuf += w; + + extra = len & (sizeof(u32) - 1); + if (extra) { + u32 v = ((u32 *) ss->sge.vaddr)[w]; + + /* Clear unused upper bytes */ + data = clear_upper_bytes(v, extra, 0); + } + } + update_sge(ss, len); + length -= len; + } + /* Update address before sending packet. */ + update_sge(ss, length); + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(last, piobuf); + /* be sure trigger word is written */ + ipath_flush_wc(); +} + +/** + * ipath_verbs_send - send a packet + * @dd: the infinipath device + * @hdrwords: the number of words in the header + * @hdr: the packet header + * @len: the length of the packet in bytes + * @ss: the SGE to send + */ +int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, + u32 *hdr, u32 len, struct ipath_sge_state *ss) +{ + u32 __iomem *piobuf; + u32 plen; + int ret; + + /* +1 is for the qword padding of pbc */ + plen = hdrwords + ((len + 3) >> 2) + 1; + if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { + ipath_dbg("packet len 0x%x too long, failing\n", plen); + ret = -EINVAL; + goto bail; + } + + /* Get a PIO buffer to use. */ + piobuf = ipath_getpiobuf(dd, NULL); + if (unlikely(piobuf == NULL)) { + ret = -EBUSY; + goto bail; + } + + /* + * Write len to control qword, no flags. + * We have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order. + */ + writeq(plen, piobuf); + ipath_flush_wc(); + piobuf += 2; + if (len == 0) { + /* + * If there is just the header portion, must flush before + * writing last word of header for correctness, and after + * the last header word (trigger word). + */ + __iowrite32_copy(piobuf, hdr, hdrwords - 1); + ipath_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); + ipath_flush_wc(); + ret = 0; + goto bail; + } + + __iowrite32_copy(piobuf, hdr, hdrwords); + piobuf += hdrwords; + + /* The common case is aligned and contained in one segment. */ + if (likely(ss->num_sge == 1 && len <= ss->sge.length && + !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { + u32 w; + u32 *addr = (u32 *) ss->sge.vaddr; + + /* Update address before sending packet. */ + update_sge(ss, len); + /* Need to round up for the last dword in the packet. */ + w = (len + 3) >> 2; + __iowrite32_copy(piobuf, addr, w - 1); + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(addr[w - 1], piobuf + w - 1); + /* be sure trigger word is written */ + ipath_flush_wc(); + ret = 0; + goto bail; + } + copy_io(piobuf, ss, len); + ret = 0; + +bail: + return ret; +} + +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait) +{ + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_dbg("unit %u not usable\n", dd->ipath_unit); + ret = -EINVAL; + goto bail; + } + *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); + + ret = 0; + +bail: + return ret; +} + +/** + * ipath_get_counters - get various chip counters + * @dd: the infinipath device + * @cntrs: counters are placed here + * + * Return the counters needed by recv_pma_get_portcounters(). + */ +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs) +{ + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_dbg("unit %u not usable\n", dd->ipath_unit); + ret = -EINVAL; + goto bail; + } + cntrs->symbol_error_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); + cntrs->link_error_recovery_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ + cntrs->link_downed_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); + cntrs->port_rcv_errors = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) + + dd->ipath_rxfc_unsupvl_errs; + cntrs->port_rcv_remphys_errors = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); + cntrs->port_xmit_discards = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); + cntrs->port_xmit_data = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + cntrs->port_rcv_data = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + cntrs->port_xmit_packets = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + cntrs->port_rcv_packets = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + cntrs->local_link_integrity_errors = + (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors; + cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs; + + ret = 0; + +bail: + return ret; +} + /** * ipath_ib_piobufavail - callback when a PIO buffer is available * @arg: the device pointer @@ -591,9 +933,8 @@ static void ipath_ib_timer(void *arg) * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and * return zero). */ -static int ipath_ib_piobufavail(void *arg) +int ipath_ib_piobufavail(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_qp *qp; unsigned long flags; @@ -624,14 +965,14 @@ static int ipath_query_device(struct ib_device *ibdev, IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID; props->page_size_cap = PAGE_SIZE; - props->vendor_id = ipath_layer_get_vendorid(dev->dd); - props->vendor_part_id = ipath_layer_get_deviceid(dev->dd); - props->hw_ver = ipath_layer_get_pcirev(dev->dd); + props->vendor_id = dev->dd->ipath_vendorid; + props->vendor_part_id = dev->dd->ipath_deviceid; + props->hw_ver = dev->dd->ipath_pcirev; props->sys_image_guid = dev->sys_image_guid; props->max_mr_size = ~0ull; - props->max_qp = dev->qp_table.max; + props->max_qp = ib_ipath_max_qps; props->max_qp_wr = ib_ipath_max_qp_wrs; props->max_sge = ib_ipath_max_sges; props->max_cq = ib_ipath_max_cqs; @@ -647,7 +988,7 @@ static int ipath_query_device(struct ib_device *ibdev, props->max_srq_sge = ib_ipath_max_srq_sges; /* props->local_ca_ack_delay */ props->atomic_cap = IB_ATOMIC_HCA; - props->max_pkeys = ipath_layer_get_npkeys(dev->dd); + props->max_pkeys = ipath_get_npkeys(dev->dd); props->max_mcast_grp = ib_ipath_max_mcast_grps; props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * @@ -672,12 +1013,17 @@ const u8 ipath_cvt_physportstate[16] = { [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, }; +u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) +{ + return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); +} + static int ipath_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct ipath_ibdev *dev = to_idev(ibdev); enum ib_mtu mtu; - u16 lid = ipath_layer_get_lid(dev->dd); + u16 lid = dev->dd->ipath_lid; u64 ibcstat; memset(props, 0, sizeof(*props)); @@ -685,16 +1031,16 @@ static int ipath_query_port(struct ib_device *ibdev, props->lmc = dev->mkeyprot_resv_lmc & 7; props->sm_lid = dev->sm_lid; props->sm_sl = dev->sm_sl; - ibcstat = ipath_layer_get_lastibcstat(dev->dd); + ibcstat = dev->dd->ipath_lastibcstat; props->state = ((ibcstat >> 4) & 0x3) + 1; /* See phys_state_show() */ props->phys_state = ipath_cvt_physportstate[ - ipath_layer_get_lastibcstat(dev->dd) & 0xf]; + dev->dd->ipath_lastibcstat & 0xf]; props->port_cap_flags = dev->port_cap_flags; props->gid_tbl_len = 1; props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd); - props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) - + props->pkey_tbl_len = ipath_get_npkeys(dev->dd); + props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) - dev->z_pkey_violations; props->qkey_viol_cntr = dev->qkey_violations; props->active_width = IB_WIDTH_4X; @@ -704,7 +1050,7 @@ static int ipath_query_port(struct ib_device *ibdev, props->init_type_reply = 0; props->max_mtu = IB_MTU_4096; - switch (ipath_layer_get_ibmtu(dev->dd)) { + switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; break; @@ -763,7 +1109,7 @@ static int ipath_modify_port(struct ib_device *ibdev, dev->port_cap_flags |= props->set_port_cap_mask; dev->port_cap_flags &= ~props->clr_port_cap_mask; if (port_modify_mask & IB_PORT_SHUTDOWN) - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) dev->qkey_violations = 0; return 0; @@ -780,7 +1126,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port, goto bail; } gid->global.subnet_prefix = dev->gid_prefix; - gid->global.interface_id = ipath_layer_get_guid(dev->dd); + gid->global.interface_id = dev->dd->ipath_guid; ret = 0; @@ -803,18 +1149,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, * we allow allocations of more than we report for this value. */ - if (dev->n_pds_allocated == ib_ipath_max_pds) { + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) { ret = ERR_PTR(-ENOMEM); goto bail; } - pd = kmalloc(sizeof *pd, GFP_KERNEL); - if (!pd) { + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == ib_ipath_max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); ret = ERR_PTR(-ENOMEM); goto bail; } dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); /* ib_alloc_pd() will initialize pd->ibpd. */ pd->user = udata != NULL; @@ -830,7 +1180,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd) struct ipath_pd *pd = to_ipd(ibpd); struct ipath_ibdev *dev = to_idev(ibpd->device); + spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); kfree(pd); @@ -850,11 +1202,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, struct ipath_ah *ah; struct ib_ah *ret; struct ipath_ibdev *dev = to_idev(pd->device); - - if (dev->n_ahs_allocated == ib_ipath_max_ahs) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } + unsigned long flags; /* A multicast address requires a GRH (see ch. 8.4.1). */ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && @@ -881,7 +1229,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, goto bail; } + spin_lock_irqsave(&dev->n_ahs_lock, flags); + if (dev->n_ahs_allocated == ib_ipath_max_ahs) { + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + kfree(ah); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + dev->n_ahs_allocated++; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); /* ib_create_ah() will initialize ah->ibah. */ ah->attr = *ah_attr; @@ -902,8 +1259,11 @@ static int ipath_destroy_ah(struct ib_ah *ibah) { struct ipath_ibdev *dev = to_idev(ibah->device); struct ipath_ah *ah = to_iah(ibah); + unsigned long flags; + spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); kfree(ah); @@ -919,25 +1279,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) return 0; } +/** + * ipath_get_npkeys - return the size of the PKEY table for port 0 + * @dd: the infinipath device + */ +unsigned ipath_get_npkeys(struct ipath_devdata *dd) +{ + return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); +} + +/** + * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table + * @dd: the infinipath device + * @index: the PKEY index + */ +unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) +{ + unsigned ret; + + if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) + ret = 0; + else + ret = dd->ipath_pd[0]->port_pkeys[index]; + + return ret; +} + static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { struct ipath_ibdev *dev = to_idev(ibdev); int ret; - if (index >= ipath_layer_get_npkeys(dev->dd)) { + if (index >= ipath_get_npkeys(dev->dd)) { ret = -EINVAL; goto bail; } - *pkey = ipath_layer_get_pkey(dev->dd, index); + *pkey = ipath_get_pkey(dev->dd, index); ret = 0; bail: return ret; } - /** * ipath_alloc_ucontext - allocate a ucontest * @ibdev: the infiniband device @@ -970,26 +1355,102 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context) static int ipath_verbs_register_sysfs(struct ib_device *dev); +static void __verbs_timer(unsigned long arg) +{ + struct ipath_devdata *dd = (struct ipath_devdata *) arg; + + /* + * If port 0 receive packet interrupts are not available, or + * can be missed, poll the receive queue + */ + if (dd->ipath_flags & IPATH_POLL_RX_INTR) + ipath_kreceive(dd); + + /* Handle verbs layer timeouts. */ + ipath_ib_timer(dd->verbs_dev); + + mod_timer(&dd->verbs_timer, jiffies + 1); +} + +static int enable_timer(struct ipath_devdata *dd) +{ + /* + * Early chips had a design flaw where the chip and kernel idea + * of the tail register don't always agree, and therefore we won't + * get an interrupt on the next packet received. + * If the board supports per packet receive interrupts, use it. + * Otherwise, the timer function periodically checks for packets + * to cover this case. + * Either way, the timer is needed for verbs layer related + * processing. + */ + if (dd->ipath_flags & IPATH_GPIO_INTR) { + u64 val; + ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, + 0x2074076542310ULL); + /* Enable GPIO bit 2 interrupt */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val |= (u64) (1 << IPATH_GPIO_PORT0_BIT); + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + } + + init_timer(&dd->verbs_timer); + dd->verbs_timer.function = __verbs_timer; + dd->verbs_timer.data = (unsigned long)dd; + dd->verbs_timer.expires = jiffies + 1; + add_timer(&dd->verbs_timer); + + return 0; +} + +static int disable_timer(struct ipath_devdata *dd) +{ + /* Disable GPIO bit 2 interrupt */ + if (dd->ipath_flags & IPATH_GPIO_INTR) { + u64 val; + /* Disable GPIO bit 2 interrupt */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + /* + * We might want to undo changes to debugportselect, + * but how? + */ + } + + del_timer_sync(&dd->verbs_timer); + + return 0; +} + /** * ipath_register_ib_device - register our device with the infiniband core - * @unit: the device number to register * @dd: the device data structure * Return the allocated ipath_ibdev pointer or NULL on error. */ -static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) +int ipath_register_ib_device(struct ipath_devdata *dd) { - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; struct ipath_ibdev *idev; struct ib_device *dev; int ret; idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); - if (idev == NULL) + if (idev == NULL) { + ret = -ENOMEM; goto bail; + } dev = &idev->ibdev; /* Only need to initialize non-zero fields. */ + spin_lock_init(&idev->n_pds_lock); + spin_lock_init(&idev->n_ahs_lock); + spin_lock_init(&idev->n_cqs_lock); + spin_lock_init(&idev->n_qps_lock); + spin_lock_init(&idev->n_srqs_lock); + spin_lock_init(&idev->n_mcast_grps_lock); + spin_lock_init(&idev->qp_table.lock); spin_lock_init(&idev->lk_table.lock); idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); @@ -1030,7 +1491,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) idev->link_width_enabled = 3; /* 1x or 4x */ /* Snapshot current HW counters to "clear" them. */ - ipath_layer_get_counters(dd, &cntrs); + ipath_get_counters(dd, &cntrs); idev->z_symbol_error_counter = cntrs.symbol_error_counter; idev->z_link_error_recovery_counter = cntrs.link_error_recovery_counter; @@ -1054,14 +1515,14 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) * device types in the system, we can't be sure this is unique. */ if (!sys_image_guid) - sys_image_guid = ipath_layer_get_guid(dd); + sys_image_guid = dd->ipath_guid; idev->sys_image_guid = sys_image_guid; - idev->ib_unit = unit; + idev->ib_unit = dd->ipath_unit; idev->dd = dd; strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); dev->owner = THIS_MODULE; - dev->node_guid = ipath_layer_get_guid(dd); + dev->node_guid = dd->ipath_guid; dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; dev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1093,9 +1554,9 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->node_type = IB_NODE_CA; + dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; - dev->dma_device = ipath_layer_get_device(dd); + dev->dma_device = &dd->pcidev->dev; dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; @@ -1137,9 +1598,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) dev->attach_mcast = ipath_multicast_attach; dev->detach_mcast = ipath_multicast_detach; dev->process_mad = ipath_process_mad; + dev->mmap = ipath_mmap; snprintf(dev->node_desc, sizeof(dev->node_desc), - IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename); + IPATH_IDSTR " %s", system_utsname.nodename); ret = ib_register_device(dev); if (ret) @@ -1148,7 +1610,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) if (ipath_verbs_register_sysfs(dev)) goto err_class; - ipath_layer_enable_timer(dd); + enable_timer(dd); goto bail; @@ -1160,37 +1622,32 @@ err_lk: kfree(idev->qp_table.table); err_qp: ib_dealloc_device(dev); - _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n", - unit, -ret); + ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); idev = NULL; bail: - return idev; + dd->verbs_dev = idev; + return ret; } -static void ipath_unregister_ib_device(void *arg) +void ipath_unregister_ib_device(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ib_device *ibdev = &dev->ibdev; - ipath_layer_disable_timer(dev->dd); + disable_timer(dev->dd); ib_unregister_device(ibdev); if (!list_empty(&dev->pending[0]) || !list_empty(&dev->pending[1]) || !list_empty(&dev->pending[2])) - _VERBS_ERROR("ipath%d pending list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "pending list not empty!\n"); if (!list_empty(&dev->piowait)) - _VERBS_ERROR("ipath%d piowait list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "piowait list not empty!\n"); if (!list_empty(&dev->rnrwait)) - _VERBS_ERROR("ipath%d rnrwait list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); if (!ipath_mcast_tree_empty()) - _VERBS_ERROR("ipath%d multicast table memory leak!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "multicast table memory leak!\n"); /* * Note that ipath_unregister_ib_device() can be called before all * the QPs are destroyed! @@ -1201,25 +1658,12 @@ static void ipath_unregister_ib_device(void *arg) ib_dealloc_device(ibdev); } -static int __init ipath_verbs_init(void) -{ - return ipath_verbs_register(ipath_register_ib_device, - ipath_unregister_ib_device, - ipath_ib_piobufavail, ipath_ib_rcv, - ipath_ib_timer); -} - -static void __exit ipath_verbs_cleanup(void) -{ - ipath_verbs_unregister(); -} - static ssize_t show_rev(struct class_device *cdev, char *buf) { struct ipath_ibdev *dev = container_of(cdev, struct ipath_ibdev, ibdev.class_dev); - return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd)); + return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); } static ssize_t show_hca(struct class_device *cdev, char *buf) @@ -1228,7 +1672,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) container_of(cdev, struct ipath_ibdev, ibdev.class_dev); int ret; - ret = ipath_layer_get_boardname(dev->dd, buf, 128); + ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); if (ret < 0) goto bail; strcat(buf, "\n"); @@ -1255,6 +1699,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" + "RC stalls %d\n" "piobuf wait %d\n" "no piobuf %d\n" "PKT drops %d\n" @@ -1262,7 +1707,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_piowait, + dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; @@ -1305,6 +1750,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev) bail: return ret; } - -module_init(ipath_verbs_init); -module_exit(ipath_verbs_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 2df684727dc..8039f6e5f0c 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -38,10 +38,10 @@ #include <linux/spinlock.h> #include <linux/kernel.h> #include <linux/interrupt.h> +#include <linux/kref.h> #include <rdma/ib_pack.h> #include "ipath_layer.h" -#include "verbs_debug.h" #define QPN_MAX (1 << 24) #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) @@ -50,7 +50,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IPATH_UVERBS_ABI_VERSION 1 +#define IPATH_UVERBS_ABI_VERSION 2 /* * Define an ib_cq_notify value that is not valid so we know when CQ @@ -152,19 +152,6 @@ struct ipath_mcast { int n_attached; }; -/* Memory region */ -struct ipath_mr { - struct ib_mr ibmr; - struct ipath_mregion mr; /* must be last */ -}; - -/* Fast memory region */ -struct ipath_fmr { - struct ib_fmr ibfmr; - u8 page_shift; - struct ipath_mregion mr; /* must be last */ -}; - /* Protection domain */ struct ipath_pd { struct ib_pd ibpd; @@ -178,58 +165,90 @@ struct ipath_ah { }; /* - * Quick description of our CQ/QP locking scheme: - * - * We have one global lock that protects dev->cq/qp_table. Each - * struct ipath_cq/qp also has its own lock. An individual qp lock - * may be taken inside of an individual cq lock. Both cqs attached to - * a qp may be locked, with the send cq locked first. No other - * nesting should be done. - * - * Each struct ipath_cq/qp also has an atomic_t ref count. The - * pointer from the cq/qp_table to the struct counts as one reference. - * This reference also is good for access through the consumer API, so - * modifying the CQ/QP etc doesn't need to take another reference. - * Access because of a completion being polled does need a reference. - * - * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the - * destroy function to sleep on. - * - * This means that access from the consumer API requires nothing but - * taking the struct's lock. - * - * Access because of a completion event should go as follows: - * - lock cq/qp_table and look up struct - * - increment ref count in struct - * - drop cq/qp_table lock - * - lock struct, do your thing, and unlock struct - * - decrement ref count; if zero, wake up waiters - * - * To destroy a CQ/QP, we can do the following: - * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock - * - decrement ref count - * - wait_event until ref count is zero - * - * It is the consumer's responsibilty to make sure that no QP - * operations (WQE posting or state modification) are pending when the - * QP is destroyed. Also, the consumer must make sure that calls to - * qp_modify are serialized. - * - * Possible optimizations (wait for profile data to see if/where we - * have locks bouncing between CPUs): - * - split cq/qp table lock into n separate (cache-aligned) locks, - * indexed (say) by the page in the table + * This structure is used by ipath_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. */ +struct ipath_mmap_info { + struct ipath_mmap_info *next; + struct ib_ucontext *context; + void *obj; + struct kref ref; + unsigned size; + unsigned mmap_cnt; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct ipath_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ +}; +/* + * The completion queue structure. + */ struct ipath_cq { struct ib_cq ibcq; struct tasklet_struct comptask; spinlock_t lock; u8 notify; u8 triggered; - u32 head; /* new records added to the head */ - u32 tail; /* poll_cq() reads from here. */ - struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */ + struct ipath_cq_wc *queue; + struct ipath_mmap_info *ip; +}; + +/* + * A segment is a linear region of low physical memory. + * XXX Maybe we should use phys addr here and kmap()/kunmap(). + * Used by the verbs layer. + */ +struct ipath_seg { + void *vaddr; + size_t length; +}; + +/* The number of ipath_segs that fit in a page. */ +#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) + +struct ipath_segarray { + struct ipath_seg segs[IPATH_SEGSZ]; +}; + +struct ipath_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of ipath_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + struct ipath_segarray *map[0]; /* the segments */ +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct ipath_sge { + struct ipath_mregion *mr; + void *vaddr; /* current pointer into the segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* Memory region */ +struct ipath_mr { + struct ib_mr ibmr; + struct ipath_mregion mr; /* must be last */ }; /* @@ -248,32 +267,50 @@ struct ipath_swqe { /* * Receive work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->r_max_sge. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). */ struct ipath_rwqe { u64 wr_id; - u32 length; /* total length of data in sg_list */ u8 num_sge; - struct ipath_sge sg_list[0]; + struct ib_sge sg_list[0]; }; -struct ipath_rq { - spinlock_t lock; +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct ipath_rwq { u32 head; /* new work requests posted to the head */ u32 tail; /* receives pull requests from here. */ + struct ipath_rwqe wq[0]; +}; + +struct ipath_rq { + struct ipath_rwq *wq; + spinlock_t lock; u32 size; /* size of RWQE array */ u8 max_sge; - struct ipath_rwqe *wq; /* RWQE array */ }; struct ipath_srq { struct ib_srq ibsrq; struct ipath_rq rq; + struct ipath_mmap_info *ip; /* send signal when number of RWQEs < limit */ u32 limit; }; +struct ipath_sge_state { + struct ipath_sge *sg_list; /* next SGE to be used if any */ + struct ipath_sge sge; /* progress state for the current SGE */ + u8 num_sge; +}; + /* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). @@ -293,6 +330,7 @@ struct ipath_qp { atomic_t refcount; wait_queue_head_t wait; struct tasklet_struct s_task; + struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; struct ipath_sge_state s_sge; /* current send request data */ /* current RDMA read send data */ @@ -327,13 +365,16 @@ struct ipath_qp { u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 r_reuse_sge; /* for UC receive errors */ u8 r_sge_inx; /* current index into sg_list */ + u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ u8 qp_access_flags; u8 s_max_sge; /* size of s_wq->sg_list */ u8 s_retry_cnt; /* number of times to retry */ u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ + u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ @@ -345,7 +386,8 @@ struct ipath_qp { u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ struct ipath_swqe *s_wq; /* send work queue */ - struct ipath_rq r_rq; /* receive work queue */ + struct ipath_rq r_rq; /* receive work queue */ + struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; /* @@ -354,6 +396,8 @@ struct ipath_qp { #define IPATH_S_BUSY 0 #define IPATH_S_SIGNAL_REQ_WR 1 +#define IPATH_PSN_CREDIT 2048 + /* * Since struct ipath_swqe is not a fixed size, we can't simply index into * struct ipath_qp.s_wq. This function does the array index computation. @@ -369,15 +413,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, /* * Since struct ipath_rwqe is not a fixed size, we can't simply index into - * struct ipath_rq.wq. This function does the array index computation. + * struct ipath_rwq.wq. This function does the array index computation. */ static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, unsigned n) { return (struct ipath_rwqe *) - ((char *) rq->wq + + ((char *) rq->wq->wq + (sizeof(struct ipath_rwqe) + - rq->max_sge * sizeof(struct ipath_sge)) * n); + rq->max_sge * sizeof(struct ib_sge)) * n); } /* @@ -417,6 +461,7 @@ struct ipath_ibdev { struct ib_device ibdev; struct list_head dev_list; struct ipath_devdata *dd; + struct ipath_mmap_info *pending_mmaps; int ib_unit; /* This is the device number */ u16 sm_lid; /* in host order */ u8 sm_sl; @@ -435,11 +480,20 @@ struct ipath_ibdev { __be64 sys_image_guid; /* in network order */ __be64 gid_prefix; /* in network order */ __be64 mkey; + u32 n_pds_allocated; /* number of PDs allocated for device */ + spinlock_t n_pds_lock; u32 n_ahs_allocated; /* number of AHs allocated for device */ + spinlock_t n_ahs_lock; u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ + spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + u64 ipath_sword; /* total dwords sent (sample result) */ u64 ipath_rword; /* total dwords received (sample result) */ u64 ipath_spkts; /* total packets sent (sample result) */ @@ -472,6 +526,7 @@ struct ipath_ibdev { u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; + u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; @@ -494,8 +549,19 @@ struct ipath_ibdev { struct ipath_opcode_stats opstats[128]; }; -struct ipath_ucontext { - struct ib_ucontext ibucontext; +struct ipath_verbs_counters { + u64 symbol_error_counter; + u64 link_error_recovery_counter; + u64 link_downed_counter; + u64 port_rcv_errors; + u64 port_rcv_remphys_errors; + u64 port_xmit_discards; + u64 port_xmit_data; + u64 port_rcv_data; + u64 port_xmit_packets; + u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; }; static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) @@ -503,11 +569,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) return container_of(ibmr, struct ipath_mr, ibmr); } -static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct ipath_fmr, ibfmr); -} - static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) { return container_of(ibpd, struct ipath_pd, ibpd); @@ -545,12 +606,6 @@ int ipath_process_mad(struct ib_device *ibdev, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); -static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct ipath_ucontext, ibucontext); -} - /* * Compare the lower 24 bits of the two values. * Returns an integer <, ==, or > than zero. @@ -562,6 +617,13 @@ static inline int ipath_cmp24(u32 a, u32 b) struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait); + +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs); + int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); @@ -578,8 +640,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, int ipath_destroy_qp(struct ib_qp *ibqp); +void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err); + int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask); + int attr_mask, struct ib_udata *udata); int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); @@ -592,13 +656,10 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); void ipath_get_credit(struct ipath_qp *qp, u32 aeth); -void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); +int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, + u32 *hdr, u32 len, struct ipath_sge_state *ss); -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, - u32 len, u64 vaddr, u32 rkey, int acc); - -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, - struct ib_sge *sge, int acc); +void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); @@ -624,10 +685,10 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ib_sge *sge, int acc); -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, u32 len, u64 vaddr, u32 rkey, int acc); int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, @@ -638,7 +699,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, struct ib_udata *udata); int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask); + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata); int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); @@ -680,6 +742,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list); int ipath_dealloc_fmr(struct ib_fmr *ibfmr); +void ipath_release_mmap_info(struct kref *ref); + +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); void ipath_insert_rnr_queue(struct ipath_qp *qp); @@ -700,6 +766,22 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, u32 pmtu, u32 *bth0p, u32 *bth2p); +int ipath_register_ib_device(struct ipath_devdata *); + +void ipath_unregister_ib_device(struct ipath_ibdev *); + +void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); + +int ipath_ib_piobufavail(struct ipath_ibdev *); + +void ipath_ib_timer(struct ipath_ibdev *); + +unsigned ipath_get_npkeys(struct ipath_devdata *); + +u32 ipath_get_cr_errpkey(struct ipath_devdata *); + +unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); + extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; extern const u8 ipath_cvt_physportstate[]; @@ -714,6 +796,8 @@ extern unsigned int ib_ipath_max_cqs; extern unsigned int ib_ipath_max_qp_wrs; +extern unsigned int ib_ipath_max_qps; + extern unsigned int ib_ipath_max_sges; extern unsigned int ib_ipath_max_mcast_grps; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index ee0e1d96d72..085e28b939e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -207,12 +207,17 @@ static int ipath_mcast_add(struct ipath_ibdev *dev, goto bail; } + spin_lock(&dev->n_mcast_grps_lock); if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) { + spin_unlock(&dev->n_mcast_grps_lock); ret = ENOMEM; goto bail; } dev->n_mcast_grps_allocated++; + spin_unlock(&dev->n_mcast_grps_lock); + + mcast->n_attached++; list_add_tail_rcu(&mqp->list, &mcast->qp_list); @@ -343,7 +348,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); ipath_mcast_free(mcast); + spin_lock(&dev->n_mcast_grps_lock); dev->n_mcast_grps_allocated--; + spin_unlock(&dev->n_mcast_grps_lock); } ret = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c new file mode 100644 index 00000000000..0095bb70f34 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on PowerPC only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include "ipath_kernel.h" + +/** + * ipath_enable_wc - enable write combining for MMIO writes to the device + * @dd: infinipath device + * + * Nothing to do on PowerPC, so just return without error. + */ +int ipath_enable_wc(struct ipath_devdata *dd) +{ + return 0; +} + +/** + * ipath_unordered_wc - indicate whether write combining is unordered + * + * Because our performance depends on our ability to do write + * combining mmio writes in the most efficient way, we need to + * know if we are on a processor that may reorder stores when + * write combining. + */ +int ipath_unordered_wc(void) +{ + return 1; +} diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c index f8f9e2e8cbd..04696e62da8 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -123,6 +123,8 @@ int ipath_enable_wc(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " "cookie is %d\n", cookie); dd->ipath_wc_cookie = cookie; + dd->ipath_wc_base = (unsigned long) pioaddr; + dd->ipath_wc_len = (unsigned long) piolen; } } @@ -136,9 +138,16 @@ int ipath_enable_wc(struct ipath_devdata *dd) void ipath_disable_wc(struct ipath_devdata *dd) { if (dd->ipath_wc_cookie) { + int r; ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); - mtrr_del(dd->ipath_wc_cookie, 0, 0); - dd->ipath_wc_cookie = 0; + r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len); + if (r < 0) + dev_info(&dd->pcidev->dev, + "mtrr_del(%lx, %lx, %lx) failed: %d\n", + dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len, r); + dd->ipath_wc_cookie = 0; /* even on failure */ } } diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h deleted file mode 100644 index 6186676f2a1..00000000000 --- a/drivers/infiniband/hw/ipath/verbs_debug.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _VERBS_DEBUG_H -#define _VERBS_DEBUG_H - -/* - * This file contains tracing code for the ib_ipath kernel module. - */ -#ifndef _VERBS_DEBUGGING /* tracing enabled or not */ -#define _VERBS_DEBUGGING 1 -#endif - -extern unsigned ib_ipath_debug; - -#define _VERBS_ERROR(fmt,...) \ - do { \ - printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_UNIT_ERROR(unit,fmt,...) \ - do { \ - printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ - } while(0) - -#if _VERBS_DEBUGGING - -/* - * Mask values for debugging. The scheme allows us to compile out any - * of the debug tracing stuff, and if compiled in, to enable or - * disable dynamically. - * This can be set at modprobe time also: - * modprobe ib_path ib_ipath_debug=3 - */ - -#define __VERBS_INFO 0x1 /* generic low verbosity stuff */ -#define __VERBS_DBG 0x2 /* generic debug */ -#define __VERBS_VDBG 0x4 /* verbose debug */ -#define __VERBS_SMADBG 0x8000 /* sma packet debug */ - -#define _VERBS_INFO(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_INFO)) \ - printk(KERN_INFO "%s: " fmt,"ib_ipath", \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_DBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_DBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_VDBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_SMADBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#else /* ! _VERBS_DEBUGGING */ - -#define _VERBS_INFO(fmt,...) -#define _VERBS_DBG(fmt,...) -#define _VERBS_VDBG(fmt,...) -#define _VERBS_SMADBG(fmt,...) - -#endif /* _VERBS_DEBUGGING */ - -#endif /* _VERBS_DEBUG_H */ |