diff options
author | David Woodhouse <dwmw2@infradead.org> | 2008-02-03 18:29:41 +1100 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2008-02-03 18:30:32 +1100 |
commit | c1f3ee120bb61045b1c0a3ead620d1d65af47130 (patch) | |
tree | 908430bf2b47fe8e96ac623ae7ab6dd5698d0938 /drivers/infiniband/hw | |
parent | e619a75ff6201b567a539e787aa9af9bc63a3187 (diff) | |
parent | 9135f1901ee6449dfe338adf6e40e9c2025b8150 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'drivers/infiniband/hw')
48 files changed, 2234 insertions, 886 deletions
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 36b98989b15..7e7b5a66f04 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -1,5 +1,4 @@ -EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 \ - -I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core +EXTRA_CFLAGS += -Idrivers/net/cxgb3 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index eec6a30840c..03c5ff62889 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -179,7 +179,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) setup.size = 1UL << cq->size_log2; setup.credits = 65535; setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type == T3B) + if (rdev_p->t3cdev_p->type != T3A) setup.ovfl_mode = 0; else setup.ovfl_mode = 1; @@ -584,7 +584,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, { u32 i, nr_wqe, copy_len; u8 *copy_data; - u8 wr_len, utx_len; /* lenght in 8 byte flit */ + u8 wr_len, utx_len; /* length in 8 byte flit */ enum t3_wr_flags flag; __be64 *wqe; u64 utx_cmd; diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index c84d4ac4935..969d4d92845 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -315,7 +315,7 @@ struct t3_rdma_init_wr { __be32 ird; __be64 qp_dma_addr; /* 7 */ __be32 qp_dma_size; /* 8 */ - u32 irs; + __be32 irs; }; struct t3_genbit { @@ -324,7 +324,8 @@ struct t3_genbit { }; enum rdma_init_wr_flags { - RECVS_POSTED = 1, + RECVS_POSTED = (1<<0), + PRIV_QP = (1<<1), }; union t3_wr { diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 20ba372dd18..e9a08fa3dff 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -332,7 +332,7 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, } }; - if (ip_route_output_flow(&rt, &fl, NULL, 0)) + if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0)) return NULL; return rt; } @@ -1118,7 +1118,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) status2errno(rpl->status)); connect_reply_upcall(ep, status2errno(rpl->status)); state_set(&ep->com, DEAD); - if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status)) + if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) release_tid(ep->com.tdev, GET_TID(rpl), NULL); cxgb3_free_atid(ep->com.tdev, ep->atid); dst_release(ep->dst); @@ -1249,7 +1249,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, skb_trim(skb, sizeof(struct cpl_tid_release)); skb_get(skb); - if (tdev->type == T3B) + if (tdev->type != T3A) release_tid(tdev, hwtid, skb); else { struct cpl_pass_accept_rpl *rpl; diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index a6c2c4ba29e..73bfd1656f8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -122,6 +122,13 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list, *total_size += buffer_list[i].size; if (i > 0) mask |= buffer_list[i].addr; + else + mask |= buffer_list[i].addr & PAGE_MASK; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + else + mask |= (buffer_list[i].addr + buffer_list[i].size + + PAGE_SIZE - 1) & PAGE_MASK; } if (*total_size > 0xFFFFFFFFULL) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index f0c77758937..df1838f8f94 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -39,6 +39,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/ethtool.h> +#include <linux/rtnetlink.h> #include <asm/io.h> #include <asm/irq.h> @@ -645,7 +646,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err) goto err; - if (udata && t3b_device(rhp)) { + if (udata && !t3a_device(rhp)) { uresp.pbl_addr = (mhp->attr.pbl_addr - rhp->rdev.rnic_info.pbl_base) >> 3; PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__, @@ -1000,6 +1001,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->max_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_cq = dev->attr.max_cqs; props->max_cqe = dev->attr.max_cqes_per_cq; props->max_mr = dev->attr.max_mem_regs; @@ -1052,7 +1054,9 @@ static ssize_t show_fw_ver(struct class_device *cdev, char *buf) struct net_device *lldev = dev->rdev.t3cdev_p->lldev; PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.fw_version); } @@ -1064,7 +1068,9 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) struct net_device *lldev = dev->rdev.t3cdev_p->lldev; PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.driver); } diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index dd89b6b91f9..ea2cdd73dd8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -208,36 +208,19 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, struct ib_recv_wr *wr) { - int i, err = 0; - u32 pbl_addr[4]; - u8 page_size[4]; + int i; if (wr->num_sge > T3_MAX_SGE) return -EINVAL; - err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % - (1UL << (12 + page_size[i]))); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); + wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); } for (; i < T3_MAX_SGE; i++) { wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; } return 0; } @@ -659,6 +642,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, *flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); /* locking heirarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); @@ -668,6 +652,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, *flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) @@ -678,7 +663,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) { - if (t3b_device(qhp->rhp)) + if (qhp->ibqp.uobject) cxio_set_wq_in_error(&qhp->wq); else __flush_qp(qhp, flag); @@ -732,6 +717,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; init_attr.irs = qhp->ep->rcv_seq; PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " "flags 0x%x qpcaps 0x%x\n", __FUNCTION__, @@ -847,10 +833,11 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, disconnect = 1; ep = qhp->ep; } + flush_qp(qhp, &flag); break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (t3b_device(qhp->rhp)) + if (qhp->ibqp.uobject) cxio_set_wq_in_error(&qhp->wq); if (!internal) terminate = 1; diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 97d108634c5..194c1c30cf6 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -1,7 +1,7 @@ /* * IBM eServer eHCA Infiniband device driver for Linux on POWER * - * adress vector functions + * address vector functions * * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> * Khadija Souissi <souissik@de.ibm.com> @@ -50,6 +50,42 @@ static struct kmem_cache *av_cache; +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { int ret; @@ -69,15 +105,13 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.slid_path_bits = ah_attr->src_path_bits; if (ehca_static_rate < 0) { - int ah_mult = ib_rate_to_mult(ah_attr->static_rate); - int ehca_mult = - ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); - - if (ah_mult >= ehca_mult) - av->av.ipd = 0; - else - av->av.ipd = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; } else av->av.ipd = ehca_static_rate; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 2d660ae189e..f281d16040f 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -94,8 +94,11 @@ struct ehca_sma_attr { struct ehca_sport { struct ib_cq *ibcq_aqp1; - struct ib_qp *ibqp_aqp1; - enum ib_rate rate; + struct ib_qp *ibqp_sqp[2]; + /* lock to serialze modify_qp() calls for sqp in normal + * and irq path (when event PORT_ACTIVE is received first time) + */ + spinlock_t mod_sqp_lock; enum ib_port_state port_state; struct ehca_sma_attr saved_attr; }; @@ -142,6 +145,14 @@ enum ehca_ext_qp_type { EQPT_SRQ = 3, }; +/* struct to cache modify_qp()'s parms for GSI/SMI qp */ +struct ehca_mod_qp_parm { + int mask; + struct ib_qp_attr attr; +}; + +#define EHCA_MOD_QP_PARM_MAX 4 + struct ehca_qp { union { struct ib_qp ib_qp; @@ -165,10 +176,18 @@ struct ehca_qp { struct ehca_cq *recv_cq; unsigned int sqerr_purgeflag; struct hlist_node list_entries; + /* array to cache modify_qp()'s parms for GSI/SMI qp */ + struct ehca_mod_qp_parm *mod_qp_parm; + int mod_qp_parm_idx; /* mmap counter for resources mapped into user space */ u32 mm_count_squeue; u32 mm_count_rqueue; u32 mm_count_galpa; + /* unsolicited ack circumvention */ + int unsol_ack_circ; + int mtu_shift; + u32 message_count; + u32 packet_count; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -323,6 +342,8 @@ extern int ehca_static_rate; extern int ehca_port_act_time; extern int ehca_use_hp_mr; extern int ehca_scaling_code; +extern int ehca_lock_hcalls; +extern int ehca_nr_ports; struct ipzu_queue_resp { u32 qe_size; /* queue entry size */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 79c25f51c21..0467c158d4a 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -246,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, } else { if (h_ret != H_PAGE_REGISTERED) { ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%li" + "ehca_cq=%p cq_num=%x h_ret=%li " "counter=%i act_pages=%i", my_cq, my_cq->cq_number, h_ret, counter, param.act_pages); diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 15806d14046..5bd7b591987 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -151,7 +151,6 @@ int ehca_query_port(struct ib_device *ibdev, } memset(props, 0, sizeof(struct ib_port_attr)); - props->state = rblock->state; switch (rblock->max_mtu) { case 0x1: @@ -188,11 +187,20 @@ int ehca_query_port(struct ib_device *ibdev, props->subnet_timeout = rblock->subnet_timeout; props->init_type_reply = rblock->init_type_reply; - props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; - - /* at the moment (logical) link state is always LINK_UP */ - props->phys_state = 0x5; + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = 0x1; + } query_port1: ehca_free_fw_ctrlblock(rblock); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 3f617b27b95..863b34fa9ff 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -62,6 +62,7 @@ #define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) #define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) #define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) @@ -354,17 +355,34 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) { u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + int suppress_event; + /* replay modify_qp for sqps */ + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + suppress_event = !sport->ibqp_sqp[IB_QPT_GSI]; + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!suppress_event) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + /* AQP1 was destroyed, ignore this event */ + if (suppress_event) + break; + + sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); ehca_query_sma_attr(shca, port, - &shca->sport[port - 1].saved_attr); + &sport->saved_attr); } else { - shca->sport[port - 1].port_state = IB_PORT_DOWN; + sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, "is inactive"); } @@ -378,11 +396,11 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) ehca_warn(&shca->ib_device, "disruptive port " "%d configuration change", port); - shca->sport[port - 1].port_state = IB_PORT_DOWN; + sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, "is inactive"); - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); } else @@ -394,6 +412,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) case 0x33: /* trace stopped */ ehca_err(&shca->ib_device, "Traced stopped."); break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; default: ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", ec, shca->ib_device.name); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index dce503bb7d6..c469bfde270 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -189,6 +189,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void ehca_poll_eqs(unsigned long data); +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); @@ -197,4 +200,6 @@ void ehca_free_fw_ctrlblock(void *ptr); #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif +void ehca_recover_sqp(struct ib_qp *sqp); + #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c6cd38c5321..84c9b7b8669 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -43,13 +43,14 @@ #ifdef CONFIG_PPC_64K_PAGES #include <linux/slab.h> #endif + #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0024" +#define HCAD_VERSION "0025" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); @@ -66,6 +67,7 @@ int ehca_poll_all_eqs = 1; int ehca_static_rate = -1; int ehca_scaling_code = 0; int ehca_mr_largepage = 1; +int ehca_lock_hcalls = -1; module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); @@ -77,6 +79,7 @@ module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, int, S_IRUGO); module_param_named(mr_largepage, ehca_mr_largepage, int, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); MODULE_PARM_DESC(open_aqp1, "AQP1 on startup (0: no (default), 1: yes)"); @@ -87,7 +90,8 @@ MODULE_PARM_DESC(hw_level, "hardware level" " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); MODULE_PARM_DESC(nr_ports, - "number of connected ports (default: 2)"); + "number of connected ports (-1: autodetect, 1: port one only, " + "2: two ports (default)"); MODULE_PARM_DESC(use_hp_mr, "high performance MRs (0: no (default), 1: yes)"); MODULE_PARM_DESC(port_act_time, @@ -102,6 +106,9 @@ MODULE_PARM_DESC(scaling_code, MODULE_PARM_DESC(mr_largepage, "use large page for MR (0: use PAGE_SIZE (default), " "1: use large page depending on MR size"); +MODULE_PARM_DESC(lock_hcalls, + "serialize all hCalls made by the driver " + "(default: autodetect)"); DEFINE_RWLOCK(ehca_qp_idr_lock); DEFINE_RWLOCK(ehca_cq_idr_lock); @@ -258,6 +265,7 @@ static struct cap_descr { { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, + { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, }; static int ehca_sense_attributes(struct ehca_shca *shca) @@ -327,15 +335,18 @@ static int ehca_sense_attributes(struct ehca_shca *shca) shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - shca->sport[0].rate = IB_RATE_30_GBPS; - shca->sport[1].rate = IB_RATE_30_GBPS; - shca->hca_cap = rblock->hca_cap_indicators; ehca_gen_dbg(" ... HCA capabilities:"); for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is + * a firmware property, so it's valid across all adapters + */ + if (ehca_lock_hcalls == -1) + ehca_lock_hcalls = !(shca->hca_cap & HCA_CAP_H_ALLOC_RES_SYNC); + /* translate supported MR page sizes; always support 4K */ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; if (ehca_mr_largepage) { /* support extra sizes only if enabled */ @@ -501,7 +512,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) } sport->ibcq_aqp1 = ibcq; - if (sport->ibqp_aqp1) { + if (sport->ibqp_sqp[IB_QPT_GSI]) { ehca_err(&shca->ib_device, "AQP1 QP is already created."); ret = -EPERM; goto create_aqp1; @@ -527,7 +538,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) ret = PTR_ERR(ibqp); goto create_aqp1; } - sport->ibqp_aqp1 = ibqp; + sport->ibqp_sqp[IB_QPT_GSI] = ibqp; return 0; @@ -540,7 +551,7 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport) { int ret; - ret = ib_destroy_qp(sport->ibqp_aqp1); + ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); if (ret) { ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); return ret; @@ -580,6 +591,11 @@ static struct attribute_group ehca_drv_attr_grp = { .attrs = ehca_drv_attrs }; +static struct attribute_group *ehca_drv_attr_groups[] = { + &ehca_drv_attr_grp, + NULL, +}; + #define EHCA_RESOURCE_ATTR(name) \ static ssize_t ehca_show_##name(struct device *dev, \ struct device_attribute *attr, \ @@ -678,7 +694,7 @@ static int __devinit ehca_probe(struct of_device *dev, struct ehca_shca *shca; const u64 *handle; struct ib_pd *ibpd; - int ret; + int ret, i; handle = of_get_property(dev->node, "ibm,hca-handle", NULL); if (!handle) { @@ -699,6 +715,8 @@ static int __devinit ehca_probe(struct of_device *dev, return -ENOMEM; } mutex_init(&shca->modify_mutex); + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) + spin_lock_init(&shca->sport[i].mod_sqp_lock); shca->ofdev = dev; shca->ipz_hca_handle.handle = *handle; @@ -889,6 +907,9 @@ static struct of_platform_driver ehca_driver = { .match_table = ehca_device_table, .probe = ehca_probe, .remove = ehca_remove, + .driver = { + .groups = ehca_drv_attr_groups, + }, }; void ehca_poll_eqs(unsigned long data) @@ -916,7 +937,7 @@ void ehca_poll_eqs(unsigned long data) ehca_process_eq(shca, 0); } } - mod_timer(&poll_eqs_timer, jiffies + HZ); + mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); spin_unlock(&shca_list_lock); } @@ -947,10 +968,6 @@ int __init ehca_module_init(void) goto module_init2; } - ret = sysfs_create_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); - if (ret) /* only complain; we can live without attributes */ - ehca_gen_err("Cannot create driver attributes ret=%d", ret); - if (ehca_poll_all_eqs != 1) { ehca_gen_err("WARNING!!!"); ehca_gen_err("It is possible to lose interrupts."); @@ -976,7 +993,6 @@ void __exit ehca_module_exit(void) if (ehca_poll_all_eqs == 1) del_timer_sync(&poll_eqs_timer); - sysfs_remove_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); ibmebus_unregister_driver(&ehca_driver); ehca_destroy_slab_caches(); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index de182648b28..1012f15a714 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -592,10 +592,8 @@ static struct ehca_qp *internal_create_qp( goto create_qp_exit1; } - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) - parms.sigtype = HCALL_SIGT_EVERY; - else - parms.sigtype = HCALL_SIGT_BY_WQE; + /* Always signal by WQE so we can hide circ. WQEs */ + parms.sigtype = HCALL_SIGT_BY_WQE; /* UD_AV CIRCUMVENTION */ max_send_sge = init_attr->cap.max_send_sge; @@ -618,6 +616,10 @@ static struct ehca_qp *internal_create_qp( parms.squeue.max_sge = max_send_sge; parms.rqueue.max_sge = max_recv_sge; + /* RC QPs need one more SWQE for unsolicited ack circumvention */ + if (qp_type == IB_QPT_RC) + parms.squeue.max_wr++; + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { if (HAS_SQ(my_qp)) ehca_determine_small_queue( @@ -650,6 +652,8 @@ static struct ehca_qp *internal_create_qp( parms.squeue.act_nr_sges = 1; parms.rqueue.act_nr_sges = 1; } + /* hide the extra WQE */ + parms.squeue.act_nr_wqes--; break; case IB_QPT_UD: case IB_QPT_GSI: @@ -729,12 +733,31 @@ static struct ehca_qp *internal_create_qp( init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; my_qp->init_attr = *init_attr; + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + &my_qp->ib_qp; + if (ehca_nr_ports < 0) { + /* alloc array to cache subsequent modify qp parms + * for autodetect mode + */ + my_qp->mod_qp_parm = + kzalloc(EHCA_MOD_QP_PARM_MAX * + sizeof(*my_qp->mod_qp_parm), + GFP_KERNEL); + if (!my_qp->mod_qp_parm) { + ehca_err(pd->device, + "Could not alloc mod_qp_parm"); + goto create_qp_exit4; + } + } + } + /* NOTE: define_apq0() not supported yet */ if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - goto create_qp_exit4; + goto create_qp_exit5; } } @@ -743,7 +766,7 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit4; + goto create_qp_exit5; } } @@ -769,12 +792,18 @@ static struct ehca_qp *internal_create_qp( if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit4; + goto create_qp_exit6; } } return my_qp; +create_qp_exit6: + ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); + +create_qp_exit5: + kfree(my_qp->mod_qp_parm); + create_qp_exit4: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); @@ -838,7 +867,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, /* copy back return values */ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; - srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge; + srq_init_attr->attr.max_sge = 3; /* drive SRQ into RTR state */ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); @@ -858,7 +887,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT" + ehca_err(pd->device, "Could not modify SRQ to INIT " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -872,7 +901,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ" + ehca_err(pd->device, "Could not enable SRQ " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -886,7 +915,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR" + ehca_err(pd->device, "Could not modify SRQ to RTR " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -992,7 +1021,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, unsigned long flags = 0; /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); if (!mqpcb) { ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); @@ -1180,6 +1209,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); } if (attr_mask & IB_QP_PORT) { + struct ehca_sport *sport; + struct ehca_qp *aqp1; if (attr->port_num < 1 || attr->port_num > shca->num_ports) { ret = -EINVAL; ehca_err(ibqp->device, "Invalid port=%x. " @@ -1188,6 +1219,29 @@ static int internal_modify_qp(struct ib_qp *ibqp, shca->num_ports); goto modify_qp_exit2; } + sport = &shca->sport[attr->port_num - 1]; + if (!sport->ibqp_sqp[IB_QPT_GSI]) { + /* should not occur */ + ret = -EFAULT; + ehca_err(ibqp->device, "AQP1 was not created for " + "port=%x", attr->port_num); + goto modify_qp_exit2; + } + aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], + struct ehca_qp, ib_qp); + if (ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_SMI && + aqp1->mod_qp_parm) { + /* + * firmware will reject this modify_qp() because + * port is not activated/initialized fully + */ + ret = -EFAULT; + ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " + "either port is being activated (try again) " + "or cabling issue", attr->port_num); + goto modify_qp_exit2; + } mqpcb->prim_phys_port = attr->port_num; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); } @@ -1196,10 +1250,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); } if (attr_mask & IB_QP_AV) { - int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> - init_attr.port_num].rate); - mqpcb->dlid = attr->ah_attr.dlid; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); mqpcb->source_path_bits = attr->ah_attr.src_path_bits; @@ -1207,11 +1257,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ah_mult < ehca_mult) - mqpcb->max_static_rate = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; - else - mqpcb->max_static_rate = 0; + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); /* @@ -1247,6 +1298,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_PATH_MTU) { + /* store ld(MTU) */ + my_qp->mtu_shift = attr->path_mtu + 7; mqpcb->path_mtu = attr->path_mtu; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); } @@ -1280,10 +1333,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); } if (attr_mask & IB_QP_ALT_PATH) { - int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult( - shca->sport[my_qp->init_attr.port_num].rate); - if (attr->alt_port_num < 1 || attr->alt_port_num > shca->num_ports) { ret = -EINVAL; @@ -1309,10 +1358,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; mqpcb->service_level_al = attr->alt_ah_attr.sl; - if (ah_mult > 0 && ah_mult < ehca_mult) - mqpcb->max_static_rate_al = (ehca_mult - 1) / ah_mult; - else - mqpcb->max_static_rate_al = 0; + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } /* OpenIB doesn't support alternate retry counts - copy them */ mqpcb->retry_count_al = mqpcb->retry_count; @@ -1472,6 +1523,8 @@ modify_qp_exit1: int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); @@ -1484,9 +1537,100 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, return -EINVAL; } + /* The if-block below caches qp_attr to be modified for GSI and SMI + * qps during the initialization by ib_mad. When the respective port + * is activated, ie we got an event PORT_ACTIVE, we'll replay the + * cached modify calls sequence, see ehca_recover_sqs() below. + * Why that is required: + * 1) If one port is connected, older code requires that port one + * to be connected and module option nr_ports=1 to be given by + * user, which is very inconvenient for end user. + * 2) Firmware accepts modify_qp() only if respective port has become + * active. Older code had a wait loop of 30sec create_qp()/ + * define_aqp1(), which is not appropriate in practice. This + * code now removes that wait loop, see define_aqp1(), and always + * reports all ports to ib_mad resp. users. Only activated ports + * will then usable for the users. + */ + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + int port = my_qp->init_attr.port_num; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + /* cache qp_attr only during init */ + if (my_qp->mod_qp_parm) { + struct ehca_mod_qp_parm *p; + if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { + ehca_err(&shca->ib_device, + "mod_qp_parm overflow state=%x port=%x" + " type=%x", attr->qp_state, + my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, + flags); + return -EINVAL; + } + p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; + p->mask = attr_mask; + p->attr = *attr; + my_qp->mod_qp_parm_idx++; + ehca_dbg(&shca->ib_device, + "Saved qp_attr for state=%x port=%x type=%x", + attr->qp_state, my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + return 0; + } + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + return internal_modify_qp(ibqp, attr, attr_mask, 0); } +void ehca_recover_sqp(struct ib_qp *sqp) +{ + struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); + int port = my_sqp->init_attr.port_num; + struct ib_qp_attr attr; + struct ehca_mod_qp_parm *qp_parm; + int i, qp_parm_idx, ret; + unsigned long flags, wr_cnt; + + if (!my_sqp->mod_qp_parm) + return; + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); + + qp_parm = my_sqp->mod_qp_parm; + qp_parm_idx = my_sqp->mod_qp_parm_idx; + for (i = 0; i < qp_parm_idx; i++) { + attr = qp_parm[i].attr; + ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); + if (ret) { + ehca_err(sqp->device, "Could not modify SQP port=%x " + "qp_num=%x ret=%x", port, sqp->qp_num, ret); + goto free_qp_parm; + } + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", + port, sqp->qp_num, attr.qp_state); + } + + /* re-trigger posted recv wrs */ + wr_cnt = my_sqp->ipz_rqueue.current_q_offset / + my_sqp->ipz_rqueue.qe_size; + if (wr_cnt) { + spin_lock_irqsave(&my_sqp->spinlock_r, flags); + hipz_update_rqa(my_sqp, wr_cnt); + spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); + ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", + port, sqp->qp_num, wr_cnt); + } + +free_qp_parm: + kfree(qp_parm); + /* this prevents subsequent calls to modify_qp() to cache qp_attr */ + my_sqp->mod_qp_parm = NULL; +} + int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) @@ -1755,7 +1899,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) } srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; - srq_attr->max_sge = qpcb->actual_nr_sges_in_rq_wqe; + srq_attr->max_sge = 3; srq_attr->srq_limit = EHCA_BMASK_GET( MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); @@ -1774,6 +1918,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); + struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; u32 cur_pid = current->tgid; u32 qp_num = my_qp->real_qp_num; int ret; @@ -1820,6 +1965,14 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, port_num = my_qp->init_attr.port_num; qp_type = my_qp->init_attr.qp_type; + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + /* no support for IB_QPT_SMI yet */ if (qp_type == IB_QPT_GSI) { struct ib_event event; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index ea91360835d..3aacc8cf1e4 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -50,6 +50,9 @@ #include "hcp_if.h" #include "hipz_fns.h" +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, struct ib_recv_wr *recv_wr) @@ -81,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, if (ehca_debug_level) { ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } return 0; @@ -135,7 +138,8 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr) + const struct ib_send_wr *send_wr, + int hidden) { u32 idx; u64 dma_length; @@ -176,7 +180,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->wr_flag = 0; - if (send_wr->send_flags & IB_SEND_SIGNALED) + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; if (send_wr->opcode == IB_WR_SEND_WITH_IMM || @@ -199,7 +205,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; wqe_p->local_ee_context_qkey = remote_qkey; - if (!send_wr->wr.ud.ah) { + if (unlikely(!send_wr->wr.ud.ah)) { ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); return -EINVAL; } @@ -255,6 +261,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, } /* eof idx */ wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + break; default: @@ -355,13 +370,49 @@ static inline void map_ib_wc_status(u32 cqe_status, *wc_status = IB_WC_SUCCESS; } +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + struct ib_send_wr **bad_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_send_wr) + *bad_send_wr = cur_send_wr; + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + if (bad_send_wr) + *bad_send_wr = cur_send_wr; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr) { struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); struct ib_send_wr *cur_send_wr; - struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; unsigned long flags; @@ -369,37 +420,33 @@ int ehca_post_send(struct ib_qp *qp, /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_s, flags); + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } + /* loop processes list of send reqs */ for (cur_send_wr = send_wr; cur_send_wr != NULL; cur_send_wr = cur_send_wr->next) { - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - if (bad_send_wr) - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); - } - goto post_send_exit0; - } - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr); - /* - * if something failed, - * reset the free entry pointer to the start value - */ + ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0); if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); - } + /* if one or more WQEs were successful, don't fail */ + if (wqe_cnt) + ret = 0; goto post_send_exit0; } wqe_cnt++; @@ -410,6 +457,7 @@ int ehca_post_send(struct ib_qp *qp, post_send_exit0: iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + my_qp->message_count += wqe_cnt; spin_unlock_irqrestore(&my_qp->spinlock_s, flags); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index f0792e5fbd0..79e72b25b25 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -40,11 +40,8 @@ */ -#include <linux/module.h> -#include <linux/err.h> #include "ehca_classes.h" #include "ehca_tools.h" -#include "ehca_qes.h" #include "ehca_iverbs.h" #include "hcp_if.h" @@ -93,6 +90,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca, return H_PARAMETER; } + if (ehca_nr_ports < 0) /* autodetect mode */ + return H_SUCCESS; + for (counter = 0; shca->sport[port - 1].port_state != IB_PORT_ACTIVE && counter < ehca_port_act_time; diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index c16a21374bb..7029aa65375 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -120,26 +120,21 @@ static long ehca_plpar_hcall_norets(unsigned long opcode, unsigned long arg7) { long ret; - int i, sleep_msecs, do_lock; - unsigned long flags; + int i, sleep_msecs; + unsigned long flags = 0; ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - /* lock H_FREE_RESOURCE(MR) against itself and H_ALLOC_RESOURCE(MR) */ - if ((opcode == H_FREE_RESOURCE) && (arg7 == 5)) { - arg7 = 0; /* better not upset firmware */ - do_lock = 1; - } - for (i = 0; i < 5; i++) { - if (do_lock) + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) spin_lock_irqsave(&hcall_lock, flags); ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - if (do_lock) + if (ehca_lock_hcalls) spin_unlock_irqrestore(&hcall_lock, flags); if (H_IS_LONG_BUSY(ret)) { @@ -174,24 +169,22 @@ static long ehca_plpar_hcall9(unsigned long opcode, unsigned long arg9) { long ret; - int i, sleep_msecs, do_lock; + int i, sleep_msecs; unsigned long flags = 0; ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); - /* lock H_ALLOC_RESOURCE(MR) against itself and H_FREE_RESOURCE(MR) */ - do_lock = ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)); - for (i = 0; i < 5; i++) { - if (do_lock) + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) spin_lock_irqsave(&hcall_lock, flags); ret = plpar_hcall9(opcode, outs, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); - if (do_lock) + if (ehca_lock_hcalls) spin_unlock_irqrestore(&hcall_lock, flags); if (H_IS_LONG_BUSY(ret)) { @@ -821,7 +814,7 @@ u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, return ehca_plpar_hcall_norets(H_FREE_RESOURCE, adapter_handle.handle, /* r4 */ mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 5); + 0, 0, 0, 0, 0); } u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index d9739e55451..bf996c7acc4 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -378,6 +378,7 @@ struct hipz_query_hca { #define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) #define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) #define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) +#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) /* query port response block */ struct hipz_query_port { @@ -402,7 +403,11 @@ struct hipz_query_port { u64 max_msg_sz; u32 max_mtu; u32 vl_cap; - u8 reserved2[1900]; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; u64 guid_entries[255]; } __attribute__ ((packed)); diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 851df8a75e7..41462109554 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -82,6 +82,16 @@ #define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ /* + * These 3 values (SDR and DDR may be ORed for auto-speed + * negotiation) are used for the 3rd argument to path_f_set_ib_cfg + * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They + * are also the the possible values for ipath_link_speed_enabled and active + * The values were chosen to match values used within the IB spec. + */ +#define IPATH_IB_SDR 1 +#define IPATH_IB_DDR 2 + +/* * stats maintained by the driver. For now, at least, this is global * to all minor devices. */ @@ -433,8 +443,9 @@ struct ipath_user_info { #define IPATH_CMD_UNUSED_2 26 #define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ #define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ +#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ -#define IPATH_CMD_MAX 28 +#define IPATH_CMD_MAX 29 /* * Poll types @@ -477,6 +488,8 @@ struct ipath_cmd { __u64 port_info; /* enable/disable receipt of packets */ __u32 recv_ctrl; + /* enable/disable armlaunch errors (non-zero to enable) */ + __u32 armlaunch_ctrl; /* partition key to set */ __u16 part_key; /* user address of __u32 bitmask of active slaves */ @@ -579,7 +592,7 @@ struct ipath_flash { struct infinipath_counters { __u64 LBIntCnt; __u64 LBFlowStallCnt; - __u64 Reserved1; + __u64 TxSDmaDescCnt; /* was Reserved1 */ __u64 TxUnsupVLErrCnt; __u64 TxDataPktCnt; __u64 TxFlowPktCnt; @@ -615,12 +628,26 @@ struct infinipath_counters { __u64 RxP6HdrEgrOvflCnt; __u64 RxP7HdrEgrOvflCnt; __u64 RxP8HdrEgrOvflCnt; - __u64 Reserved6; - __u64 Reserved7; + __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */ + __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */ + __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */ + __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */ __u64 IBStatusChangeCnt; __u64 IBLinkErrRecoveryCnt; __u64 IBLinkDownedCnt; __u64 IBSymbolErrCnt; + /* The following are new for IBA7220 */ + __u64 RxVL15DroppedPktCnt; + __u64 RxOtherLocalPhyErrCnt; + __u64 PcieRetryBufDiagQwordCnt; + __u64 ExcessBufferOvflCnt; + __u64 LocalLinkIntegrityErrCnt; + __u64 RxVlErrCnt; + __u64 RxDlidFltrCnt; }; /* diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 645ed71fd79..a03bd28d9b4 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -395,16 +395,13 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail; } - /* - * Return the address of the WC as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = (__u64) wc; + __u64 offset = 0; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) - goto bail; + goto bail_free; } spin_lock_irq(&cq->lock); @@ -424,10 +421,8 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) else n = head - tail; if (unlikely((u32)cqe < n)) { - spin_unlock_irq(&cq->lock); - vfree(wc); - ret = -EOVERFLOW; - goto bail; + ret = -EINVAL; + goto bail_unlock; } for (n = 0; tail != head; n++) { if (cq->ip) @@ -452,6 +447,18 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) struct ipath_mmap_info *ip = cq->ip; ipath_update_mmap_info(dev, ip, sz, wc); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, &dev->pending_mmaps); @@ -459,7 +466,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) } ret = 0; + goto bail; +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); bail: return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index 19c56e6491e..d6f69532d83 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -55,7 +55,7 @@ #define __IPATH_PKTDBG 0x80 /* print packet data */ /* print process startup (init)/exit messages */ #define __IPATH_PROCDBG 0x100 -/* print mmap/nopage stuff, not using VDBG any more */ +/* print mmap/fault stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x200 #define __IPATH_ERRPKTDBG 0x400 #define __IPATH_USER_SEND 0x1000 /* use user mode send */ @@ -81,7 +81,7 @@ #define __IPATH_VERBDBG 0x0 /* very verbose debug */ #define __IPATH_PKTDBG 0x0 /* print packet data */ #define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */ -/* print mmap/nopage stuff, not using VDBG any more */ +/* print mmap/fault stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x0 #define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ #define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 1f152ded1e3..d5ff6ca2db3 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -121,6 +121,9 @@ static struct pci_driver ipath_driver = { .probe = ipath_init_one, .remove = __devexit_p(ipath_remove_one), .id_table = ipath_pci_tbl, + .driver = { + .groups = ipath_driver_attr_groups, + }, }; static void ipath_check_status(struct work_struct *work) @@ -331,6 +334,8 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd) udelay(1); } + ipath_disable_armlaunch(dd); + writeq(0, piobuf); /* length 0, no dwords actually sent */ ipath_flush_wc(); @@ -362,6 +367,7 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd) done: /* disarm piobuf, so it's available again */ ipath_disarm_piobufs(dd, pbnum, 1); + ipath_enable_armlaunch(dd); } static int __devinit ipath_init_one(struct pci_dev *pdev, @@ -800,31 +806,37 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, unsigned cnt) { unsigned i, last = first + cnt; - u64 sendctrl, sendorig; + unsigned long flags; ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); - sendorig = dd->ipath_sendctrl; for (i = first; i < last; i++) { - sendctrl = sendorig | INFINIPATH_S_DISARM | - (i << INFINIPATH_S_DISARMPIOBUF_SHIFT); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + /* + * The disarm-related bits are write-only, so it + * is ok to OR them in with our copy of sendctrl + * while we hold the lock. + */ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - sendctrl); + dd->ipath_sendctrl | INFINIPATH_S_DISARM | + (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); + /* can't disarm bufs back-to-back per iba7220 spec */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /* - * Write it again with current value, in case ipath_sendctrl changed - * while we were looping; no critical bits that would require - * locking. - * - * disable PIOAVAILUPD, then re-enable, reading scratch in + * Disable PIOAVAILUPD, then re-enable, reading scratch in * between. This seems to avoid a chip timing race that causes - * pioavail updates to memory to stop. + * pioavail updates to memory to stop. We xor as we don't + * know the state of the bit when we're called. */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - sendorig & ~INFINIPATH_S_PIOBUFAVAILUPD); - sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + dd->ipath_sendctrl ^ INFINIPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /** @@ -1000,12 +1012,10 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len) * ipath_get_egrbuf - get an eager buffer * @dd: the infinipath device * @bufnum: the eager buffer to get - * @err: unused * * must only be called if ipath_pd[port] is known to be allocated */ -static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum, - int err) +static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) { return dd->ipath_port0_skbinfo ? (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; @@ -1097,13 +1107,14 @@ static void ipath_rcv_hdrerr(struct ipath_devdata *dd, /* * ipath_kreceive - receive a packet - * @dd: the infinipath device + * @pd: the infinipath port * * called from interrupt handler for errors or receive interrupt */ -void ipath_kreceive(struct ipath_devdata *dd) +void ipath_kreceive(struct ipath_portdata *pd) { u64 *rc; + struct ipath_devdata *dd = pd->port_dd; void *ebuf; const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ @@ -1118,8 +1129,8 @@ void ipath_kreceive(struct ipath_devdata *dd) goto bail; } - l = dd->ipath_port0head; - hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr); + l = pd->port_head; + hdrqtail = ipath_get_rcvhdrtail(pd); if (l == hdrqtail) goto bail; @@ -1128,7 +1139,7 @@ reloop: u32 qp; u8 *bthbytes; - rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2)); + rc = (u64 *) (pd->port_rcvhdrq + (l << 2)); hdr = (struct ipath_message_header *)&rc[1]; /* * could make a network order version of IPATH_KD_QP, and @@ -1153,7 +1164,7 @@ reloop: etail = ipath_hdrget_index((__le32 *) rc); if (tlen > sizeof(*hdr) || etype == RCVHQ_RCV_TYPE_NON_KD) - ebuf = ipath_get_egrbuf(dd, etail, 0); + ebuf = ipath_get_egrbuf(dd, etail); } /* @@ -1188,7 +1199,7 @@ reloop: be32_to_cpu(hdr->bth[0]) & 0xff); else { /* - * error packet, type of error unknown. + * error packet, type of error unknown. * Probably type 3, but we don't know, so don't * even try to print the opcode, etc. */ @@ -1238,7 +1249,7 @@ reloop: * earlier packets, we "almost" guarantee we have covered * that case. */ - u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); + u32 hqtail = ipath_get_rcvhdrtail(pd); if (hqtail != hdrqtail) { hdrqtail = hqtail; reloop = 1; /* loop 1 extra time at most */ @@ -1248,7 +1259,7 @@ reloop: pkttot += i; - dd->ipath_port0head = l; + pd->port_head = l; if (pkttot > ipath_stats.sps_maxpkts_call) ipath_stats.sps_maxpkts_call = pkttot; @@ -1332,14 +1343,9 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd) /* * Chip Errata: bug 6641; even and odd qwords>3 are swapped */ - if (i > 3) { - if (i & 1) - piov = le64_to_cpu( - dd->ipath_pioavailregs_dma[i - 1]); - else - piov = le64_to_cpu( - dd->ipath_pioavailregs_dma[i + 1]); - } else + if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) + piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); + else piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); pchg = _IPATH_ALL_CHECKBITS & ~(dd->ipath_pioavailshadow[i] ^ piov); @@ -1598,7 +1604,8 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, /* clear for security and sanity on each use */ memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); - memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); + if (pd->port_rcvhdrtail_kvaddr) + memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); /* * tell chip each time we init it, even if we are re-using previous @@ -1614,77 +1621,6 @@ bail: return ret; } -int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id, - u64 bits_to_wait_for, u64 * valp) -{ - unsigned long timeout; - u64 lastval, val; - int ret; - - lastval = ipath_read_kreg64(dd, reg_id); - /* wait a ridiculously long time */ - timeout = jiffies + msecs_to_jiffies(5); - do { - val = ipath_read_kreg64(dd, reg_id); - /* set so they have something, even on failures. */ - *valp = val; - if ((val & bits_to_wait_for) == bits_to_wait_for) { - ret = 0; - break; - } - if (val != lastval) - ipath_cdbg(VERBOSE, "Changed from %llx to %llx, " - "waiting for %llx bits\n", - (unsigned long long) lastval, - (unsigned long long) val, - (unsigned long long) bits_to_wait_for); - cond_resched(); - if (time_after(jiffies, timeout)) { - ipath_dbg("Didn't get bits %llx in register 0x%x, " - "got %llx\n", - (unsigned long long) bits_to_wait_for, - reg_id, (unsigned long long) *valp); - ret = -ENODEV; - break; - } - } while (1); - - return ret; -} - -/** - * ipath_waitfor_mdio_cmdready - wait for last command to complete - * @dd: the infinipath device - * - * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go - * away indicating the last command has completed. It doesn't return data - */ -int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) -{ - unsigned long timeout; - u64 val; - int ret; - - /* wait a ridiculously long time */ - timeout = jiffies + msecs_to_jiffies(5); - do { - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio); - if (!(val & IPATH_MDIO_CMDVALID)) { - ret = 0; - break; - } - cond_resched(); - if (time_after(jiffies, timeout)) { - ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n", - (unsigned long long) val); - ret = -ENODEV; - break; - } - } while (1); - - return ret; -} - /* * Flush all sends that might be in the ready to send state, as well as any @@ -2053,6 +1989,8 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) */ void ipath_shutdown_device(struct ipath_devdata *dd) { + unsigned long flags; + ipath_dbg("Shutting down the device\n"); dd->ipath_flags |= IPATH_LINKUNK; @@ -2073,9 +2011,13 @@ void ipath_shutdown_device(struct ipath_devdata *dd) * gracefully stop all sends allowing any in progress to trickle out * first. */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl = 0; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); /* flush it */ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + /* * enough for anything that's going to trickle out to have actually * done so. @@ -2217,25 +2159,15 @@ static int __init infinipath_init(void) goto bail_unit; } - ret = ipath_driver_create_group(&ipath_driver.driver); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver " - "sysfs entries: error %d\n", -ret); - goto bail_pci; - } - ret = ipath_init_ipathfs(); if (ret < 0) { printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " "ipathfs: error %d\n", -ret); - goto bail_group; + goto bail_pci; } goto bail; -bail_group: - ipath_driver_remove_group(&ipath_driver.driver); - bail_pci: pci_unregister_driver(&ipath_driver); @@ -2250,8 +2182,6 @@ static void __exit infinipath_cleanup(void) { ipath_exit_ipathfs(); - ipath_driver_remove_group(&ipath_driver.driver); - ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); pci_unregister_driver(&ipath_driver); @@ -2344,5 +2274,34 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) } return 0; } + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing on + * the 7220, which is count-based, rather than trigger-based. Safe for the + * driver check, since it's at init. Not completely safe when used for + * user-mode checking, since some error checking can be lost, but not + * particularly risky, and only has problematic side-effects in the face of + * very buggy user code. There is no reference counting, but that's also + * fine, given the intended use. + */ +void ipath_enable_armlaunch(struct ipath_devdata *dd) +{ + dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, + INFINIPATH_E_SPIOARMLAUNCH); + dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); +} + +void ipath_disable_armlaunch(struct ipath_devdata *dd) +{ + /* so don't re-enable if already set */ + dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; + dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); +} + module_init(infinipath_init); module_exit(infinipath_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index bcfa3ccb555..e28a42f5376 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -510,10 +510,10 @@ int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -524,10 +524,10 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -538,7 +538,15 @@ static u8 flash_csum(struct ipath_flash *ifp, int adjust) u8 *ip = (u8 *) ifp; u8 csum = 0, len; - for (len = 0; len < ifp->if_length; len++) + /* + * Limit length checksummed to max length of actual data. + * Checksum of erased eeprom will still be bad, but we avoid + * reading past the end of the buffer we were passed. + */ + len = ifp->if_length; + if (len > sizeof(struct ipath_flash)) + len = sizeof(struct ipath_flash); + while (len--) csum += *ip++; csum -= ifp->if_csum; csum = ~csum; @@ -566,7 +574,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) struct ipath_devdata *dd0 = ipath_lookup(0); if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) { - u8 *bguid, oguid; + u8 oguid; dd->ipath_guid = dd0->ipath_guid; bguid = (u8 *) & dd->ipath_guid; @@ -608,9 +616,9 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) goto bail; } - down(&dd->ipath_eep_sem); + mutex_lock(&dd->ipath_eep_lock); eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (eep_stat) { ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); @@ -666,7 +674,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) * elsewhere for backward-compatibility. */ char *snp = dd->ipath_serial; - int len; memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); snp[sizeof ifp->if_sprefix] = '\0'; len = strlen(snp); @@ -756,14 +763,14 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) /* Grab semaphore and read current EEPROM. If we get an * error, let go, but if not, keep it until we finish write. */ - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (ret) { ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); goto free_bail; } ret = ipath_eeprom_internal_read(dd, 0, buf, len); if (ret) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "Unable read EEPROM for logging\n"); goto free_bail; } @@ -771,7 +778,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", csum, ifp->if_csum); ret = 1; @@ -841,7 +848,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 1); ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); } - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (ret) ipath_dev_err(dd, "Failed updating EEPROM\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 5de3243a47c..7e025c8e01b 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -169,7 +169,7 @@ static int ipath_get_base_info(struct file *fp, kinfo->spi_piocnt = dd->ipath_pbufsport; kinfo->spi_piobufbase = (u64) pd->port_piobufs; kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_palign * pd->port_port; + dd->ipath_ureg_align * pd->port_port; } else if (master) { kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + (dd->ipath_pbufsport % subport_cnt); @@ -186,7 +186,7 @@ static int ipath_get_base_info(struct file *fp, } if (shared) { kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_palign * pd->port_port; + dd->ipath_ureg_align * pd->port_port; kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; @@ -742,11 +742,12 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, * updated and correct itself, even in the face of software * bugs. */ - *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + if (pd->port_rcvhdrtail_kvaddr) + ipath_clear_rcvhdrtail(pd); + set_bit(dd->ipath_r_portenable_shift + pd->port_port, &dd->ipath_rcvctrl); } else - clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + clear_bit(dd->ipath_r_portenable_shift + pd->port_port, &dd->ipath_rcvctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -881,7 +882,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) egrcnt = dd->ipath_rcvegrcnt; /* TID number offset for this port */ - egroff = pd->port_port * egrcnt; + egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt; egrsize = dd->ipath_rcvegrbufsize; ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); @@ -1049,11 +1050,6 @@ static int mmap_piobufs(struct vm_area_struct *vma, phys = dd->ipath_physaddr + piobufs; - /* - * Don't mark this as non-cached, or we don't get the - * write combining behavior we want on the PIO buffers! - */ - #if defined(__powerpc__) /* There isn't a generic way to specify writethrough mappings */ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; @@ -1120,33 +1116,24 @@ bail: } /* - * ipath_file_vma_nopage - handle a VMA page fault. + * ipath_file_vma_fault - handle a VMA page fault. */ -static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static int ipath_file_vma_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) { - unsigned long offset = address - vma->vm_start; - struct page *page = NOPAGE_SIGBUS; - void *pageptr; + struct page *page; - /* - * Convert the vmalloc address into a struct page. - */ - pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT)); - page = vmalloc_to_page(pageptr); + page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); if (!page) - goto out; - - /* Increment the reference count. */ + return VM_FAULT_SIGBUS; get_page(page); - if (type) - *type = VM_FAULT_MINOR; -out: - return page; + vmf->page = page; + + return 0; } static struct vm_operations_struct ipath_file_vm_ops = { - .nopage = ipath_file_vma_nopage, + .fault = ipath_file_vma_fault, }; static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, @@ -1284,7 +1271,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) goto bail; } - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; if (!pd->port_subport_cnt) { /* port is not shared */ piocnt = dd->ipath_pbufsport; @@ -1400,7 +1387,10 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd, pollflag = ipath_poll_hdrqfull(pd); head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); - tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr; + if (pd->port_rcvhdrtail_kvaddr) + tail = ipath_get_rcvhdrtail(pd); + else + tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); if (head != tail) pollflag |= POLLIN | POLLRDNORM; @@ -1410,7 +1400,7 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd, /* flush waiting flag so we don't miss an event */ wmb(); - set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, + set_bit(pd->port_port + dd->ipath_r_intravail_shift, &dd->ipath_rcvctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, @@ -1790,6 +1780,7 @@ static int find_shared_port(struct file *fp, } port_fp(fp) = pd; subport_fp(fp) = pd->port_cnt++; + pd->port_subpid[subport_fp(fp)] = current->pid; tidcursor_fp(fp) = 0; pd->active_slaves |= 1 << subport_fp(fp); ipath_cdbg(PROC, @@ -1920,8 +1911,7 @@ static int ipath_do_user_init(struct file *fp, */ head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - dd->ipath_lastegrheads[pd->port_port] = -1; - dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; + pd->port_lastrcvhdrqtail = -1; ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", pd->port_port, head32); pd->port_tidcursor = 0; /* start at beginning after open */ @@ -1941,11 +1931,13 @@ static int ipath_do_user_init(struct file *fp, * We explictly set the in-memory copy to 0 beforehand, so we don't * have to wait to be sure the DMA update has happened. */ - *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + if (pd->port_rcvhdrtail_kvaddr) + ipath_clear_rcvhdrtail(pd); + set_bit(dd->ipath_r_portenable_shift + pd->port_port, &dd->ipath_rcvctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); + dd->ipath_rcvctrl & + ~(1ULL << dd->ipath_r_tailupd_shift)); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); /* Notify any waiting slaves */ @@ -2022,6 +2014,7 @@ static int ipath_close(struct inode *in, struct file *fp) * the slave(s) don't wait for receive data forever. */ pd->active_slaves &= ~(1 << fd->subport); + pd->port_subpid[fd->subport] = 0; mutex_unlock(&ipath_mutex); goto bail; } @@ -2054,9 +2047,9 @@ static int ipath_close(struct inode *in, struct file *fp) if (dd->ipath_kregbase) { int i; /* atomically clear receive enable port and intr avail. */ - clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port, + clear_bit(dd->ipath_r_portenable_shift + port, &dd->ipath_rcvctrl); - clear_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, + clear_bit(pd->port_port + dd->ipath_r_intravail_shift, &dd->ipath_rcvctrl); ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -2149,11 +2142,15 @@ static int ipath_get_slave_info(struct ipath_portdata *pd, static int ipath_force_pio_avail_update(struct ipath_devdata *dd) { - u64 reg = dd->ipath_sendctrl; + unsigned long flags; - clear_bit(IPATH_S_PIOBUFAVAILUPD, ®); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); return 0; } @@ -2227,6 +2224,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.poll_type; src = &ucmd->cmd.poll_type; break; + case IPATH_CMD_ARMLAUNCH_CTRL: + copy = sizeof(cmd.cmd.armlaunch_ctrl); + dest = &cmd.cmd.armlaunch_ctrl; + src = &ucmd->cmd.armlaunch_ctrl; + break; default: ret = -EINVAL; goto bail; @@ -2302,6 +2304,12 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, case IPATH_CMD_POLL_TYPE: pd->poll_type = cmd.cmd.poll_type; break; + case IPATH_CMD_ARMLAUNCH_CTRL: + if (cmd.cmd.armlaunch_ctrl) + ipath_enable_armlaunch(pd->port_dd); + else + ipath_disable_armlaunch(pd->port_dd); + break; } if (ret >= 0) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 262c25db05c..23faba9d21e 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -108,21 +108,16 @@ static const struct file_operations atomic_stats_ops = { .read = atomic_stats_read, }; -#define NUM_COUNTERS sizeof(struct infinipath_counters) / sizeof(u64) - static ssize_t atomic_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - u64 counters[NUM_COUNTERS]; - u16 i; + struct infinipath_counters counters; struct ipath_devdata *dd; dd = file->f_path.dentry->d_inode->i_private; + dd->ipath_f_read_counters(dd, &counters); - for (i = 0; i < NUM_COUNTERS; i++) - counters[i] = ipath_snap_cntr(dd, i); - - return simple_read_from_buffer(buf, count, ppos, counters, + return simple_read_from_buffer(buf, count, ppos, &counters, sizeof counters); } @@ -243,8 +238,7 @@ static int create_device_files(struct super_block *sb, snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, - (struct file_operations *) &simple_dir_operations, - dd); + &simple_dir_operations, dd); if (ret) { printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); goto bail; diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index ddbebe4bdb2..9e2ced3cdc5 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -148,10 +148,57 @@ struct _infinipath_do_not_use_kernel_regs { unsigned long long ReservedSW2[4]; }; -#define IPATH_KREG_OFFSET(field) (offsetof(struct \ - _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) +struct _infinipath_do_not_use_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 Reserved1; + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 Reserved6; + __u64 Reserved7; + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; +}; + +#define IPATH_KREG_OFFSET(field) (offsetof( \ + struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) #define IPATH_CREG_OFFSET(field) (offsetof( \ - struct infinipath_counters, field) / sizeof(u64)) + struct _infinipath_do_not_use_counters, field) / sizeof(u64)) static const struct ipath_kregs ipath_ht_kregs = { .kr_control = IPATH_KREG_OFFSET(Control), @@ -282,6 +329,9 @@ static const struct ipath_cregs ipath_ht_cregs = { #define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL #define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL +#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf +#define IBA6110_IBCS_LINKSTATE_SHIFT 4 + /* kr_extstatus bits */ #define INFINIPATH_EXTS_FREQSEL 0x2 #define INFINIPATH_EXTS_SERDESSEL 0x4 @@ -296,6 +346,12 @@ static const struct ipath_cregs ipath_ht_cregs = { #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL #define INFINIPATH_RT_BUFSIZE_SHIFT 48 +#define INFINIPATH_R_INTRAVAIL_SHIFT 16 +#define INFINIPATH_R_TAILUPD_SHIFT 31 + +/* kr_xgxsconfig bits */ +#define INFINIPATH_XGXS_RESET 0x7ULL + /* * masks and bits that are different in different chips, or present only * in one @@ -652,7 +708,6 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, "with ID %u\n", boardrev); snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", boardrev); - ret = 1; break; } if (n) @@ -686,6 +741,13 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, dd->ipath_htspeed); ret = 0; + /* + * set here, not in ipath_init_*_funcs because we have to do + * it after we can read chip registers. + */ + dd->ipath_ureg_align = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); + bail: return ret; } @@ -969,7 +1031,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, do { u8 cap_type; - /* the HT capability type byte is 3 bytes after the + /* + * The HT capability type byte is 3 bytes after the * capability byte. */ if (pci_read_config_byte(pdev, pos + 3, &cap_type)) { @@ -982,6 +1045,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, } while ((pos = pci_find_next_capability(pdev, pos, PCI_CAP_ID_HT))); + dd->ipath_flags |= IPATH_SWAP_PIOBUFS; + bail: return ret; } @@ -1074,11 +1139,55 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, static void ipath_init_ht_variables(struct ipath_devdata *dd) { + /* + * setup the register offsets, since they are different for each + * chip + */ + dd->ipath_kregs = &ipath_ht_kregs; + dd->ipath_cregs = &ipath_ht_cregs; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; dd->ipath_gpio_sda = IPATH_GPIO_SDA; dd->ipath_gpio_scl = IPATH_GPIO_SCL; + /* + * Fill in data for field-values that change in newer chips. + * We dynamically specify only the mask for LINKTRAININGSTATE + * and only the shift for LINKSTATE, as they are the only ones + * that change. Also precalculate the 3 link states of interest + * and the combined mask. + */ + dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT; + dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK; + dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << + dd->ibcs_ls_shift) | dd->ibcs_lts_mask; + dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); + dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); + dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); + + /* + * Fill in data for ibcc field-values that change in newer chips. + * We dynamically specify only the mask for LINKINITCMD + * and only the shift for LINKCMD and MAXPKTLEN, as they are + * the only ones that change. + */ + dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; + dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; + dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + + /* Fill in shifts for RcvCtrl. */ + dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; + dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; + dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; + dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */ + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << @@ -1135,6 +1244,8 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; + dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; /* * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. @@ -1148,9 +1259,17 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd) INFINIPATH_HWE_RXEMEMPARITYERR_MASK << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; - dd->ipath_eep_st_masks[2].errs_to_log = - INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET; + dd->delay_mult = 2; /* SDR, 4X, can't change */ + + dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + dd->ipath_link_speed_supported = IPATH_IB_SDR; + dd->ipath_link_width_enabled = IB_WIDTH_4X; + dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; + /* these can't change for this chip, so set once */ + dd->ipath_link_width_active = dd->ipath_link_width_enabled; + dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; } /** @@ -1205,14 +1324,16 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) val &= ~INFINIPATH_HWE_HTCMISCERR4; /* - * PLL ignored because MDIO interface has a logic problem - * for reads, on Comstock and Ponderosa. BRINGUP + * PLL ignored because unused MDIO interface has a logic problem */ if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9) val &= ~INFINIPATH_HWE_SERDESPLLFAILED; dd->ipath_hwerrmask = val; } + + + /** * ipath_ht_bringup_serdes - bring up the serdes * @dd: the infinipath device @@ -1284,16 +1405,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) } val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & - INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { - val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK << - INFINIPATH_XGXS_MDIOADDR_SHIFT); - /* - * we use address 3 - */ - val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; - change = 1; - } if (val & INFINIPATH_XGXS_RESET) { /* normally true after boot */ val &= ~INFINIPATH_XGXS_RESET; @@ -1329,21 +1440,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) (unsigned long long) ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - if (!ipath_waitfor_mdio_cmdready(dd)) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio, - ipath_mdio_req(IPATH_MDIO_CMD_READ, 31, - IPATH_MDIO_CTRL_XGXS_REG_8, - 0)); - if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio, - IPATH_MDIO_DATAVALID, &val)) - ipath_dbg("Never got MDIO data for XGXS status " - "read\n"); - else - ipath_cdbg(VERBOSE, "MDIO Read reg8, " - "'bank' 31 %x\n", (u32) val); - } else - ipath_dbg("Never got MDIO cmdready for XGXS status read\n"); - return ret; /* for now, say we always succeeded */ } @@ -1396,6 +1492,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, pa |= lenvalid | INFINIPATH_RT_VALID; } } + writeq(pa, tidptr); } @@ -1526,8 +1623,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) } ipath_get_eeprom_info(dd); - if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && - dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { + if (dd->ipath_boardrev == 5) { /* * Later production QHT7040 has same changes as QHT7140, so * can use GPIO interrupts. They have serial #'s starting @@ -1602,6 +1698,210 @@ static void ipath_ht_free_irq(struct ipath_devdata *dd) dd->ipath_intconfig = 0; } +static struct ipath_message_header * +ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) +{ + return (struct ipath_message_header *) + &rhf_addr[sizeof(u64) / sizeof(u32)]; +} + +static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports) +{ + dd->ipath_portcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); + dd->ipath_p0_rcvegrcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); +} + +static void ipath_ht_read_counters(struct ipath_devdata *dd, + struct infinipath_counters *cntrs) +{ + cntrs->LBIntCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); + cntrs->LBFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); + cntrs->TxSDmaDescCnt = 0; + cntrs->TxUnsupVLErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); + cntrs->TxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); + cntrs->TxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); + cntrs->TxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); + cntrs->TxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); + cntrs->TxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); + cntrs->TxUnderrunCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); + cntrs->TxFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); + cntrs->TxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); + cntrs->RxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); + cntrs->RxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); + cntrs->RxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); + cntrs->RxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); + cntrs->RxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); + cntrs->RxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); + cntrs->RxICRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); + cntrs->RxVCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); + cntrs->RxFlowCtrlErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); + cntrs->RxBadFormatCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); + cntrs->RxLinkProblemCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); + cntrs->RxEBPCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); + cntrs->RxLPCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); + cntrs->RxBufOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); + cntrs->RxTIDFullErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); + cntrs->RxTIDValidErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); + cntrs->RxPKeyMismatchCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); + cntrs->RxP0HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); + cntrs->RxP1HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); + cntrs->RxP2HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); + cntrs->RxP3HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); + cntrs->RxP4HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); + cntrs->RxP5HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt)); + cntrs->RxP6HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt)); + cntrs->RxP7HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt)); + cntrs->RxP8HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt)); + cntrs->RxP9HdrEgrOvflCnt = 0; + cntrs->RxP10HdrEgrOvflCnt = 0; + cntrs->RxP11HdrEgrOvflCnt = 0; + cntrs->RxP12HdrEgrOvflCnt = 0; + cntrs->RxP13HdrEgrOvflCnt = 0; + cntrs->RxP14HdrEgrOvflCnt = 0; + cntrs->RxP15HdrEgrOvflCnt = 0; + cntrs->RxP16HdrEgrOvflCnt = 0; + cntrs->IBStatusChangeCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); + cntrs->IBLinkErrRecoveryCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); + cntrs->IBLinkDownedCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); + cntrs->IBSymbolErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); + cntrs->RxVL15DroppedPktCnt = 0; + cntrs->RxOtherLocalPhyErrCnt = 0; + cntrs->PcieRetryBufDiagQwordCnt = 0; + cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; + cntrs->LocalLinkIntegrityErrCnt = + (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors; + cntrs->RxVlErrCnt = 0; + cntrs->RxDlidFltrCnt = 0; +} + + +/* no interrupt fallback for these chips */ +static int ipath_ht_nointr_fallback(struct ipath_devdata *dd) +{ + return 0; +} + + +/* + * reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. + */ +static void ipath_ht_xgxs_reset(struct ipath_devdata *dd) +{ + u64 val, prev_val; + + prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val = prev_val | INFINIPATH_XGXS_RESET; + prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control & ~INFINIPATH_C_LINKENABLE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); +} + + +static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which) +{ + int ret; + + switch (which) { + case IPATH_IB_CFG_LWID: + ret = dd->ipath_link_width_active; + break; + case IPATH_IB_CFG_SPD: + ret = dd->ipath_link_speed_active; + break; + case IPATH_IB_CFG_LWID_ENB: + ret = dd->ipath_link_width_enabled; + break; + case IPATH_IB_CFG_SPD_ENB: + ret = dd->ipath_link_speed_enabled; + break; + default: + ret = -ENOTSUPP; + break; + } + return ret; +} + + +/* we assume range checking is already done, if needed */ +static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) +{ + int ret = 0; + + if (which == IPATH_IB_CFG_LWID_ENB) + dd->ipath_link_width_enabled = val; + else if (which == IPATH_IB_CFG_SPD_ENB) + dd->ipath_link_speed_enabled = val; + else + ret = -ENOTSUPP; + return ret; +} + + +static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b) +{ +} + + +static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) +{ + ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs), + ipath_ib_linktrstate(dd, ibcs)); + return 0; +} + + /** * ipath_init_iba6110_funcs - set up the chip-specific function pointers * @dd: the infinipath device @@ -1626,22 +1926,19 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd) dd->ipath_f_setextled = ipath_setup_ht_setextled; dd->ipath_f_get_base_info = ipath_ht_get_base_info; dd->ipath_f_free_irq = ipath_ht_free_irq; - - /* - * initialize chip-specific variables - */ dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; + dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback; + dd->ipath_f_get_msgheader = ipath_ht_get_msgheader; + dd->ipath_f_config_ports = ipath_ht_config_ports; + dd->ipath_f_read_counters = ipath_ht_read_counters; + dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset; + dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg; + dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg; + dd->ipath_f_config_jint = ipath_ht_config_jint; + dd->ipath_f_ib_updown = ipath_ht_ib_updown; /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_ht_kregs; - dd->ipath_cregs = &ipath_ht_cregs; - - /* - * do very early init that is needed before ipath_f_bus is - * called + * initialize chip-specific variables */ ipath_init_ht_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 0103d6f4847..c7a2f50824c 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -145,10 +145,57 @@ struct _infinipath_do_not_use_kernel_regs { unsigned long long Reserved12; }; -#define IPATH_KREG_OFFSET(field) (offsetof(struct \ - _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) +struct _infinipath_do_not_use_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 Reserved1; + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 Reserved6; + __u64 Reserved7; + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; +}; + +#define IPATH_KREG_OFFSET(field) (offsetof( \ + struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) #define IPATH_CREG_OFFSET(field) (offsetof( \ - struct infinipath_counters, field) / sizeof(u64)) + struct _infinipath_do_not_use_counters, field) / sizeof(u64)) static const struct ipath_kregs ipath_pe_kregs = { .kr_control = IPATH_KREG_OFFSET(Control), @@ -282,6 +329,9 @@ static const struct ipath_cregs ipath_pe_cregs = { #define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL #define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL +#define IBA6120_IBCS_LINKTRAININGSTATE_MASK 0xf +#define IBA6120_IBCS_LINKSTATE_SHIFT 4 + /* kr_extstatus bits */ #define INFINIPATH_EXTS_FREQSEL 0x2 #define INFINIPATH_EXTS_SERDESSEL 0x4 @@ -296,6 +346,9 @@ static const struct ipath_cregs ipath_pe_cregs = { #define IPATH_GPIO_SCL (1ULL << \ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) +#define INFINIPATH_R_INTRAVAIL_SHIFT 16 +#define INFINIPATH_R_TAILUPD_SHIFT 31 + /* 6120 specific hardware errors... */ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), @@ -320,10 +373,28 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) -static int ipath_pe_txe_recover(struct ipath_devdata *); static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *, u32, unsigned long); +/* + * On platforms using this chip, and not having ordered WC stores, we + * can get TXE parity errors due to speculative reads to the PIO buffers, + * and this, due to a chip bug can result in (many) false parity error + * reports. So it's a debug print on those, and an info print on systems + * where the speculative reads don't occur. + */ +static void ipath_pe_txe_recover(struct ipath_devdata *dd) +{ + if (ipath_unordered_wc()) + ipath_dbg("Recovering from TXE PIO parity error\n"); + else { + ++ipath_stats.sps_txeparity; + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); + } +} + + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -403,35 +474,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * occur if a processor speculative read is done to the PIO * buffer while we are sending a packet, for example. */ - if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd)) + if (hwerrs & TXE_PIO_PARITY) { + ipath_pe_txe_recover(dd); hwerrs &= ~TXE_PIO_PARITY; - if (hwerrs) { - /* - * if any set that we aren't ignoring only make the - * complaint once, in case it's stuck or recurring, - * and we get here multiple times - * Force link down, so switch knows, and - * LEDs are turned off - */ - if (dd->ipath_flags & IPATH_INITTED) { - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - ipath_setup_pe_setextled(dd, - INFINIPATH_IBCS_L_STATE_DOWN, - INFINIPATH_IBCS_LT_STATE_DISABLED); - ipath_dev_err(dd, "Fatal Hardware Error (freeze " - "mode), no longer usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - /* - * Mark as having had an error for driver, and also - * for /sys and status word mapped to user programs. - * This marks unit as not usable, until reset - */ - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - dd->ipath_flags &= ~IPATH_INITTED; - } else { + } + if (!hwerrs) { static u32 freeze_cnt; freeze_cnt++; @@ -485,7 +532,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { /* - * If it occurs, it is left masked since the eternal + * If it occurs, it is left masked since the external * interface is unused */ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; @@ -563,6 +610,14 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, dd->ipath_f_put_tid = ipath_pe_put_tid_2; } + + /* + * set here, not in ipath_init_*_funcs because we have to do + * it after we can read chip registers. + */ + dd->ipath_ureg_align = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); + return ret; } @@ -667,17 +722,8 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); prev_val = val; - if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & - INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { - val &= - ~(INFINIPATH_XGXS_MDIOADDR_MASK << - INFINIPATH_XGXS_MDIOADDR_SHIFT); - /* MDIO address 3 */ - val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; - } - if (val & INFINIPATH_XGXS_RESET) { + if (val & INFINIPATH_XGXS_RESET) val &= ~INFINIPATH_XGXS_RESET; - } if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { /* need to compensate for Tx inversion in partner */ @@ -707,21 +753,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) (unsigned long long) ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - if (!ipath_waitfor_mdio_cmdready(dd)) { - ipath_write_kreg( - dd, dd->ipath_kregs->kr_mdio, - ipath_mdio_req(IPATH_MDIO_CMD_READ, 31, - IPATH_MDIO_CTRL_XGXS_REG_8, 0)); - if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio, - IPATH_MDIO_DATAVALID, &val)) - ipath_dbg("Never got MDIO data for XGXS " - "status read\n"); - else - ipath_cdbg(VERBOSE, "MDIO Read reg8, " - "'bank' 31 %x\n", (u32) val); - } else - ipath_dbg("Never got MDIO cmdready for XGXS status read\n"); - return ret; } @@ -902,12 +933,27 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd, else ipath_dev_err(dd, "Can't find PCI Express " "capability!\n"); + + dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + dd->ipath_link_speed_supported = IPATH_IB_SDR; + dd->ipath_link_width_enabled = IB_WIDTH_4X; + dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; + /* these can't change for this chip, so set once */ + dd->ipath_link_width_active = dd->ipath_link_width_enabled; + dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; return 0; } static void ipath_init_pe_variables(struct ipath_devdata *dd) { /* + * setup the register offsets, since they are different for each + * chip + */ + dd->ipath_kregs = &ipath_pe_kregs; + dd->ipath_cregs = &ipath_pe_cregs; + + /* * bits for selecting i2c direction and values, * used for I2C serial flash */ @@ -916,6 +962,43 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) dd->ipath_gpio_sda = IPATH_GPIO_SDA; dd->ipath_gpio_scl = IPATH_GPIO_SCL; + /* + * Fill in data for field-values that change in newer chips. + * We dynamically specify only the mask for LINKTRAININGSTATE + * and only the shift for LINKSTATE, as they are the only ones + * that change. Also precalculate the 3 link states of interest + * and the combined mask. + */ + dd->ibcs_ls_shift = IBA6120_IBCS_LINKSTATE_SHIFT; + dd->ibcs_lts_mask = IBA6120_IBCS_LINKTRAININGSTATE_MASK; + dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << + dd->ibcs_ls_shift) | dd->ibcs_lts_mask; + dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); + dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); + dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << + INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | + (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); + + /* + * Fill in data for ibcc field-values that change in newer chips. + * We dynamically specify only the mask for LINKINITCMD + * and only the shift for LINKCMD and MAXPKTLEN, as they are + * the only ones that change. + */ + dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; + dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; + dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + + /* Fill in shifts for RcvCtrl. */ + dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; + dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; + dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; + dd->ipath_r_portcfg_shift = 0; /* Not on IBA6120 */ + /* variables for sanity checking interrupt and errors */ dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << @@ -963,6 +1046,8 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; + dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; /* * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. @@ -984,6 +1069,7 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + dd->delay_mult = 2; /* SDR, 4X, can't change */ } /* setup the MSI stuff again after a reset. I'd like to just call @@ -1289,6 +1375,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) */ dd->ipath_rcvhdrentsize = 24; dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; + dd->ipath_rhf_offset = 0; + dd->ipath_egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase); /* * To truly support a 4KB MTU (for usermode), we need to @@ -1359,34 +1448,204 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd) dd->ipath_irq = 0; } + +static struct ipath_message_header * +ipath_pe_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) +{ + return (struct ipath_message_header *) + &rhf_addr[sizeof(u64) / sizeof(u32)]; +} + +static void ipath_pe_config_ports(struct ipath_devdata *dd, ushort cfgports) +{ + dd->ipath_portcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); + dd->ipath_p0_rcvegrcnt = + ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); +} + +static void ipath_pe_read_counters(struct ipath_devdata *dd, + struct infinipath_counters *cntrs) +{ + cntrs->LBIntCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); + cntrs->LBFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); + cntrs->TxSDmaDescCnt = 0; + cntrs->TxUnsupVLErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); + cntrs->TxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); + cntrs->TxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); + cntrs->TxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); + cntrs->TxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); + cntrs->TxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); + cntrs->TxUnderrunCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); + cntrs->TxFlowStallCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); + cntrs->TxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); + cntrs->RxDroppedPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); + cntrs->RxDataPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); + cntrs->RxFlowPktCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); + cntrs->RxDwordCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); + cntrs->RxLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); + cntrs->RxMaxMinLenErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); + cntrs->RxICRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); + cntrs->RxVCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); + cntrs->RxFlowCtrlErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); + cntrs->RxBadFormatCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); + cntrs->RxLinkProblemCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); + cntrs->RxEBPCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); + cntrs->RxLPCRCErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); + cntrs->RxBufOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); + cntrs->RxTIDFullErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); + cntrs->RxTIDValidErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); + cntrs->RxPKeyMismatchCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); + cntrs->RxP0HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); + cntrs->RxP1HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); + cntrs->RxP2HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); + cntrs->RxP3HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); + cntrs->RxP4HdrEgrOvflCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); + cntrs->RxP5HdrEgrOvflCnt = 0; + cntrs->RxP6HdrEgrOvflCnt = 0; + cntrs->RxP7HdrEgrOvflCnt = 0; + cntrs->RxP8HdrEgrOvflCnt = 0; + cntrs->RxP9HdrEgrOvflCnt = 0; + cntrs->RxP10HdrEgrOvflCnt = 0; + cntrs->RxP11HdrEgrOvflCnt = 0; + cntrs->RxP12HdrEgrOvflCnt = 0; + cntrs->RxP13HdrEgrOvflCnt = 0; + cntrs->RxP14HdrEgrOvflCnt = 0; + cntrs->RxP15HdrEgrOvflCnt = 0; + cntrs->RxP16HdrEgrOvflCnt = 0; + cntrs->IBStatusChangeCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); + cntrs->IBLinkErrRecoveryCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); + cntrs->IBLinkDownedCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); + cntrs->IBSymbolErrCnt = + ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); + cntrs->RxVL15DroppedPktCnt = 0; + cntrs->RxOtherLocalPhyErrCnt = 0; + cntrs->PcieRetryBufDiagQwordCnt = 0; + cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; + cntrs->LocalLinkIntegrityErrCnt = dd->ipath_lli_errs; + cntrs->RxVlErrCnt = 0; + cntrs->RxDlidFltrCnt = 0; +} + + +/* no interrupt fallback for these chips */ +static int ipath_pe_nointr_fallback(struct ipath_devdata *dd) +{ + return 0; +} + + /* - * On platforms using this chip, and not having ordered WC stores, we - * can get TXE parity errors due to speculative reads to the PIO buffers, - * and this, due to a chip bug can result in (many) false parity error - * reports. So it's a debug print on those, and an info print on systems - * where the speculative reads don't occur. - * Because we can get lots of false errors, we have no upper limit - * on recovery attempts on those platforms. + * reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. */ -static int ipath_pe_txe_recover(struct ipath_devdata *dd) +static void ipath_pe_xgxs_reset(struct ipath_devdata *dd) { - if (ipath_unordered_wc()) - ipath_dbg("Recovering from TXE PIO parity error\n"); - else { - int cnt = ++ipath_stats.sps_txeparity; - if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { - if (cnt == IPATH_MAX_PARITY_ATTEMPTS) - ipath_dev_err(dd, - "Too many attempts to recover from " - "TXE parity, giving up\n"); - return 0; - } - dev_info(&dd->pcidev->dev, - "Recovering from TXE PIO parity error\n"); + u64 val, prev_val; + + prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val = prev_val | INFINIPATH_XGXS_RESET; + prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control & ~INFINIPATH_C_LINKENABLE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); +} + + +static int ipath_pe_get_ib_cfg(struct ipath_devdata *dd, int which) +{ + int ret; + + switch (which) { + case IPATH_IB_CFG_LWID: + ret = dd->ipath_link_width_active; + break; + case IPATH_IB_CFG_SPD: + ret = dd->ipath_link_speed_active; + break; + case IPATH_IB_CFG_LWID_ENB: + ret = dd->ipath_link_width_enabled; + break; + case IPATH_IB_CFG_SPD_ENB: + ret = dd->ipath_link_speed_enabled; + break; + default: + ret = -ENOTSUPP; + break; } - return 1; + return ret; +} + + +/* we assume range checking is already done, if needed */ +static int ipath_pe_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) +{ + int ret = 0; + + if (which == IPATH_IB_CFG_LWID_ENB) + dd->ipath_link_width_enabled = val; + else if (which == IPATH_IB_CFG_SPD_ENB) + dd->ipath_link_speed_enabled = val; + else + ret = -ENOTSUPP; + return ret; } +static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b) +{ +} + + +static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) +{ + ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs), + ipath_ib_linktrstate(dd, ibcs)); + return 0; +} + + /** * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device @@ -1407,7 +1666,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; dd->ipath_f_clear_tids = ipath_pe_clear_tids; /* - * this may get changed after we read the chip revision, + * _f_put_tid may get changed after we read the chip revision, * but we start with the safe version for all revs */ dd->ipath_f_put_tid = ipath_pe_put_tid; @@ -1415,17 +1674,19 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) dd->ipath_f_setextled = ipath_setup_pe_setextled; dd->ipath_f_get_base_info = ipath_pe_get_base_info; dd->ipath_f_free_irq = ipath_pe_free_irq; - - /* initialize chip-specific variables */ dd->ipath_f_tidtemplate = ipath_pe_tidtemplate; + dd->ipath_f_intr_fallback = ipath_pe_nointr_fallback; + dd->ipath_f_xgxs_reset = ipath_pe_xgxs_reset; + dd->ipath_f_get_msgheader = ipath_pe_get_msgheader; + dd->ipath_f_config_ports = ipath_pe_config_ports; + dd->ipath_f_read_counters = ipath_pe_read_counters; + dd->ipath_f_get_ib_cfg = ipath_pe_get_ib_cfg; + dd->ipath_f_set_ib_cfg = ipath_pe_set_ib_cfg; + dd->ipath_f_config_jint = ipath_pe_config_jint; + dd->ipath_f_ib_updown = ipath_pe_ib_updown; - /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_pe_kregs; - dd->ipath_cregs = &ipath_pe_cregs; + /* initialize chip-specific variables */ ipath_init_pe_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 9dd0bacf846..4471674975c 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -91,7 +91,7 @@ static int create_port0_egr(struct ipath_devdata *dd) struct ipath_skbinfo *skbinfo; int ret; - egrcnt = dd->ipath_rcvegrcnt; + egrcnt = dd->ipath_p0_rcvegrcnt; skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); if (skbinfo == NULL) { @@ -244,8 +244,7 @@ static int init_chip_first(struct ipath_devdata *dd, * cfgports. We do still check and report a difference, if * not same (should be impossible). */ - dd->ipath_portcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); + dd->ipath_f_config_ports(dd, ipath_cfgports); if (!ipath_cfgports) dd->ipath_cfgports = dd->ipath_portcnt; else if (ipath_cfgports <= dd->ipath_portcnt) { @@ -272,22 +271,7 @@ static int init_chip_first(struct ipath_devdata *dd, goto done; } - dd->ipath_lastegrheads = kzalloc(sizeof(*dd->ipath_lastegrheads) - * dd->ipath_cfgports, - GFP_KERNEL); - dd->ipath_lastrcvhdrqtails = - kzalloc(sizeof(*dd->ipath_lastrcvhdrqtails) - * dd->ipath_cfgports, GFP_KERNEL); - - if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) { - ipath_dev_err(dd, "Unable to allocate head arrays, " - "failing\n"); - ret = -ENOMEM; - goto done; - } - pd = create_portdata0(dd); - if (!pd) { ipath_dev_err(dd, "Unable to allocate portdata for port " "0, failing\n"); @@ -345,10 +329,10 @@ static int init_chip_first(struct ipath_devdata *dd, dd->ipath_piobcnt2k, dd->ipath_pio2kbase); spin_lock_init(&dd->ipath_tid_lock); - + spin_lock_init(&dd->ipath_sendctrl_lock); spin_lock_init(&dd->ipath_gpio_lock); spin_lock_init(&dd->ipath_eep_st_lock); - sema_init(&dd->ipath_eep_sem, 1); + mutex_init(&dd->ipath_eep_lock); done: *pdp = pd; @@ -372,9 +356,9 @@ static int init_chip_reset(struct ipath_devdata *dd, *pdp = dd->ipath_pd[0]; /* ensure chip does no sends or receives while we re-initialize */ dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U; - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control); rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); if (dd->ipath_portcnt != rtmp) @@ -487,6 +471,7 @@ static void enable_chip(struct ipath_devdata *dd, struct ipath_portdata *pd, int reinit) { u32 val; + unsigned long flags; int i; if (!reinit) @@ -495,19 +480,21 @@ static void enable_chip(struct ipath_devdata *dd, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); /* Enable PIO send, and update of PIOavail regs to memory. */ dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | INFINIPATH_S_PIOBUFAVAILUPD; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); /* * enable port 0 receive, and receive interrupt. other ports * done as user opens and inits them. */ - dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD | - (1ULL << INFINIPATH_R_PORTENABLE_SHIFT) | - (1ULL << INFINIPATH_R_INTRAVAIL_SHIFT); + dd->ipath_rcvctrl = (1ULL << dd->ipath_r_tailupd_shift) | + (1ULL << dd->ipath_r_portenable_shift) | + (1ULL << dd->ipath_r_intravail_shift); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -523,12 +510,11 @@ static void enable_chip(struct ipath_devdata *dd, */ val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0); (void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0); - dd->ipath_port0head = ipath_read_ureg32(dd, ur_rcvhdrtail, 0); /* Initialize so we interrupt on next packet received */ (void)ipath_write_ureg(dd, ur_rcvhdrhead, dd->ipath_rhdrhead_intr_off | - dd->ipath_port0head, 0); + dd->ipath_pd[0]->port_head, 0); /* * by now pioavail updates to memory should have occurred, so @@ -542,12 +528,8 @@ static void enable_chip(struct ipath_devdata *dd, /* * Chip Errata bug 6641; even and odd qwords>3 are swapped. */ - if (i > 3) { - if (i & 1) - val = dd->ipath_pioavailregs_dma[i - 1]; - else - val = dd->ipath_pioavailregs_dma[i + 1]; - } + if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) + val = dd->ipath_pioavailregs_dma[i ^ 1]; else val = dd->ipath_pioavailregs_dma[i]; dd->ipath_pioavailshadow[i] = le64_to_cpu(val); @@ -690,12 +672,13 @@ done: */ int ipath_init_chip(struct ipath_devdata *dd, int reinit) { - int ret = 0, i; + int ret = 0; u32 val32, kpiobufs; u32 piobufs, uports; u64 val; struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ gfp_t gfp_flags = GFP_USER | __GFP_COMP; + unsigned long flags; ret = init_housekeeping(dd, &pd, reinit); if (ret) @@ -746,7 +729,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) kpiobufs = ipath_kpiobufs; if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { - i = (int) piobufs - + int i = (int) piobufs - (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); if (i < 0) i = 0; @@ -827,8 +810,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_PIOENABLE); + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); /* * before error clears, since we expect serdes pll errors during diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 6a5dd5cd773..92e58c92152 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -453,7 +453,7 @@ skip_ibchange: } static void handle_supp_msgs(struct ipath_devdata *dd, - unsigned supp_msgs, char msg[512]) + unsigned supp_msgs, char *msg, int msgsz) { /* * Print the message unless it's ibc status change only, which @@ -461,9 +461,9 @@ static void handle_supp_msgs(struct ipath_devdata *dd, */ if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { int iserr; - iserr = ipath_decode_err(msg, sizeof msg, - dd->ipath_lasterror & - ~INFINIPATH_E_IBSTATUSCHANGED); + iserr = ipath_decode_err(msg, msgsz, + dd->ipath_lasterror & + ~INFINIPATH_E_IBSTATUSCHANGED); if (dd->ipath_lasterror & ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) @@ -492,8 +492,8 @@ static void handle_supp_msgs(struct ipath_devdata *dd, } static unsigned handle_frequent_errors(struct ipath_devdata *dd, - ipath_err_t errs, char msg[512], - int *noprint) + ipath_err_t errs, char *msg, + int msgsz, int *noprint) { unsigned long nc; static unsigned long nextmsg_time; @@ -512,7 +512,7 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd, nextmsg_time = nc + HZ * 3; } else if (supp_msgs) { - handle_supp_msgs(dd, supp_msgs, msg); + handle_supp_msgs(dd, supp_msgs, msg, msgsz); supp_msgs = 0; nmsgs = 0; } @@ -525,14 +525,14 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd, static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) { - char msg[512]; + char msg[128]; u64 ignore_this_time = 0; int i, iserr = 0; int chkerrpkts = 0, noprint = 0; unsigned supp_msgs; int log_idx; - supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); + supp_msgs = handle_frequent_errors(dd, errs, msg, sizeof msg, &noprint); /* don't report errors that are masked */ errs &= ~dd->ipath_maskederrs; @@ -683,7 +683,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) for (i = 0; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; if (i == 0) { - hd = dd->ipath_port0head; + hd = pd->port_head; tl = (u32) le64_to_cpu( *dd->ipath_hdrqtailptr); } else if (pd && pd->port_cnt && @@ -693,7 +693,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * except kernel */ tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; - if (tl == dd->ipath_lastrcvhdrqtails[i]) + if (tl == pd->port_lastrcvhdrqtail) continue; hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); @@ -703,7 +703,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) (!hd && tl == dd->ipath_hdrqlast)) { if (i == 0) chkerrpkts = 1; - dd->ipath_lastrcvhdrqtails[i] = tl; + pd->port_lastrcvhdrqtail = tl; pd->port_hdrqfull++; /* flush hdrqfull so that poll() sees it */ wmb(); @@ -712,6 +712,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) } } if (errs & INFINIPATH_E_RRCVEGRFULL) { + struct ipath_portdata *pd = dd->ipath_pd[0]; + /* * since this is of less importance and not likely to * happen without also getting hdrfull, only count @@ -719,7 +721,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * vs user) */ ipath_stats.sps_etidfull++; - if (dd->ipath_port0head != + if (pd->port_head != (u32) le64_to_cpu(*dd->ipath_hdrqtailptr)) chkerrpkts = 1; } @@ -795,6 +797,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd) { int i, im; __le64 val; + unsigned long flags; /* disable error interrupts, to avoid confusion */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); @@ -813,11 +816,14 @@ void ipath_clear_freeze(struct ipath_devdata *dd) dd->ipath_control); /* ensure pio avail updates continue */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); /* * We just enabled pioavailupdate, so dma copy is almost certainly @@ -825,8 +831,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd) */ for (i = 0; i < dd->ipath_pioavregs; i++) { /* deal with 6110 chip bug */ - im = i > 3 ? ((i&1) ? i-1 : i+1) : i; - val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im); + im = i > 3 ? i ^ 1 : i; + val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im); dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] = le64_to_cpu(val); } @@ -849,7 +855,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd) /* this is separate to allow for better optimization of ipath_intr() */ -static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) +static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp) { /* * sometimes happen during driver init and unload, don't want @@ -877,7 +883,7 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) dd->ipath_f_free_irq(dd); } } - if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) { + if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) { ipath_dev_err(dd, "%u unexpected interrupts, " "disabling interrupts completely\n", *unexpectp); @@ -892,7 +898,7 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) "ignoring\n"); } -static void ipath_bad_regread(struct ipath_devdata *dd) +static noinline void ipath_bad_regread(struct ipath_devdata *dd) { static int allbits; @@ -920,31 +926,9 @@ static void ipath_bad_regread(struct ipath_devdata *dd) } } -static void handle_port_pioavail(struct ipath_devdata *dd) -{ - u32 i; - /* - * start from port 1, since for now port 0 is never using - * wait_event for PIO - */ - for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (pd && pd->port_cnt && - dd->ipath_portpiowait & (1U << i)) { - clear_bit(i, &dd->ipath_portpiowait); - if (test_bit(IPATH_PORT_WAITING_PIO, - &pd->port_flag)) { - clear_bit(IPATH_PORT_WAITING_PIO, - &pd->port_flag); - wake_up_interruptible(&pd->port_wait); - } - } - } -} - static void handle_layer_pioavail(struct ipath_devdata *dd) { + unsigned long flags; int ret; ret = ipath_ib_piobufavail(dd->verbs_dev); @@ -953,9 +937,12 @@ static void handle_layer_pioavail(struct ipath_devdata *dd) return; set: - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /* @@ -969,7 +956,15 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) int i; int rcvdint = 0; - /* test_bit below needs this... */ + /* + * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and + * test_and_clear_bit(IPATH_PORT_WAITING_URG) below + * would both like timely updates of the bits so that + * we don't pass them by unnecessarily. the rmb() + * here ensures that we see them promptly -- the + * corresponding wmb()'s are in ipath_poll_urgent() + * and ipath_poll_next()... + */ rmb(); portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & dd->ipath_i_rcvavail_mask) @@ -980,7 +975,7 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) if (portr & (1 << i) && pd && pd->port_cnt) { if (test_and_clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, + clear_bit(i + dd->ipath_r_intravail_shift, &dd->ipath_rcvctrl); wake_up_interruptible(&pd->port_wait); rcvdint = 1; @@ -1039,7 +1034,7 @@ irqreturn_t ipath_intr(int irq, void *data) goto bail; } - istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus); + istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus); if (unlikely(!istat)) { ipath_stats.sps_nullintr++; @@ -1180,7 +1175,7 @@ irqreturn_t ipath_intr(int irq, void *data) * for receive are at the bottom. */ if (chk0rcv) { - ipath_kreceive(dd); + ipath_kreceive(dd->ipath_pd[0]); istat &= ~port0rbits; } @@ -1191,12 +1186,14 @@ irqreturn_t ipath_intr(int irq, void *data) handle_urcv(dd, istat); if (istat & INFINIPATH_I_SPIOBUFAVAIL) { - clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - - if (dd->ipath_portpiowait) - handle_port_pioavail(dd); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); handle_layer_pioavail(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 8786dd7922e..4cc0f95ea87 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -41,6 +41,7 @@ #include <linux/interrupt.h> #include <linux/pci.h> #include <linux/dma-mapping.h> +#include <linux/mutex.h> #include <asm/io.h> #include <rdma/ib_verbs.h> @@ -140,6 +141,11 @@ struct ipath_portdata { u32 port_pionowait; /* total number of rcvhdrqfull errors */ u32 port_hdrqfull; + /* + * Used to suppress multiple instances of same + * port staying stuck at same point. + */ + u32 port_lastrcvhdrqtail; /* saved total number of rcvhdrqfull errors for poll edge trigger */ u32 port_hdrqfull_poll; /* total number of polled urgent packets */ @@ -148,6 +154,7 @@ struct ipath_portdata { u32 port_urgent_poll; /* pid of process using this port */ pid_t port_pid; + pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; /* same size as task_struct .comm[] */ char port_comm[16]; /* pkeys set by this use of this port */ @@ -166,6 +173,8 @@ struct ipath_portdata { u32 active_slaves; /* Type of packets or conditions we want to poll for */ u16 poll_type; + /* port rcvhdrq head offset */ + u32 port_head; }; struct sk_buff; @@ -182,6 +191,22 @@ struct ipath_skbinfo { dma_addr_t phys; }; +/* + * Possible IB config parameters for ipath_f_get/set_ib_cfg() + */ +#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */ +#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */ +#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */ +#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */ +#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */ +#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */ +#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */ +#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */ +#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */ +#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */ +#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */ + + struct ipath_devdata { struct list_head ipath_list; @@ -222,6 +247,8 @@ struct ipath_devdata { struct _ipath_layer ipath_layer; /* setup intr */ int (*ipath_f_intrsetup)(struct ipath_devdata *); + /* fallback to alternate interrupt type if possible */ + int (*ipath_f_intr_fallback)(struct ipath_devdata *); /* setup on-chip bus config */ int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *); /* hard reset chip */ @@ -244,6 +271,18 @@ struct ipath_devdata { int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); /* free irq */ void (*ipath_f_free_irq)(struct ipath_devdata *); + struct ipath_message_header *(*ipath_f_get_msgheader) + (struct ipath_devdata *, __le32 *); + void (*ipath_f_config_ports)(struct ipath_devdata *, ushort); + int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int); + int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32); + void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16); + void (*ipath_f_read_counters)(struct ipath_devdata *, + struct infinipath_counters *); + void (*ipath_f_xgxs_reset)(struct ipath_devdata *); + /* per chip actions needed for IB Link up/down changes */ + int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64); + struct ipath_ibdev *verbs_dev; struct timer_list verbs_timer; /* total dwords sent (summed from counter) */ @@ -313,22 +352,12 @@ struct ipath_devdata { * supports, less gives more pio bufs/port, etc. */ u32 ipath_cfgports; - /* port0 rcvhdrq head offset */ - u32 ipath_port0head; /* count of port 0 hdrqfull errors */ u32 ipath_p0_hdrqfull; + /* port 0 number of receive eager buffers */ + u32 ipath_p0_rcvegrcnt; /* - * (*cfgports) used to suppress multiple instances of same - * port staying stuck at same point - */ - u32 *ipath_lastrcvhdrqtails; - /* - * (*cfgports) used to suppress multiple instances of same - * port staying stuck at same point - */ - u32 *ipath_lastegrheads; - /* * index of last piobuffer we used. Speeds up searching, by * starting at this point. Doesn't matter if multiple cpu's use and * update, last updater is only write that matters. Whenever it @@ -367,14 +396,15 @@ struct ipath_devdata { unsigned long ipath_wc_len; /* ref count for each pkey */ atomic_t ipath_pkeyrefs[4]; - /* shadow copy of all exptids physaddr; used only by funcsim */ - u64 *ipath_tidsimshadow; /* shadow copy of struct page *'s for exp tid pages */ struct page **ipath_pageshadow; /* shadow copy of dma handles for exp tid pages */ dma_addr_t *ipath_physshadow; - /* lock to workaround chip bug 9437 */ + u64 __iomem *ipath_egrtidbase; + /* lock to workaround chip bug 9437 and others */ + spinlock_t ipath_kernel_tid_lock; spinlock_t ipath_tid_lock; + spinlock_t ipath_sendctrl_lock; /* * IPATH_STATUS_*, @@ -395,6 +425,8 @@ struct ipath_devdata { void *ipath_dummy_hdrq; /* used after port close */ dma_addr_t ipath_dummy_hdrq_phys; + unsigned long ipath_ureg_align; /* user register alignment */ + /* * Shadow copies of registers; size indicates read access size. * Most of them are readonly, but some are write-only register, @@ -456,8 +488,6 @@ struct ipath_devdata { unsigned long ipath_rcvctrl; /* shadow kr_sendctrl */ unsigned long ipath_sendctrl; - /* ports waiting for PIOavail intr */ - unsigned long ipath_portpiowait; unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */ /* value we put in kr_rcvhdrcnt */ @@ -550,12 +580,26 @@ struct ipath_devdata { u8 ipath_minrev; /* board rev, from ipath_revision */ u8 ipath_boardrev; + + u8 ipath_r_portenable_shift; + u8 ipath_r_intravail_shift; + u8 ipath_r_tailupd_shift; + u8 ipath_r_portcfg_shift; + /* unit # of this chip, if present */ int ipath_unit; /* saved for restore after reset */ u8 ipath_pci_cacheline; /* LID mask control */ u8 ipath_lmc; + /* link width supported */ + u8 ipath_link_width_supported; + /* link speed supported */ + u8 ipath_link_speed_supported; + u8 ipath_link_width_enabled; + u8 ipath_link_speed_enabled; + u8 ipath_link_width_active; + u8 ipath_link_speed_active; /* Rx Polarity inversion (compensate for ~tx on partner) */ u8 ipath_rx_pol_inv; @@ -590,6 +634,8 @@ struct ipath_devdata { */ u32 ipath_i_rcvavail_mask; u32 ipath_i_rcvurg_mask; + u16 ipath_i_rcvurg_shift; + u16 ipath_i_rcvavail_shift; /* * Register bits for selecting i2c direction and values, used for @@ -603,6 +649,29 @@ struct ipath_devdata { /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ spinlock_t ipath_gpio_lock; + /* + * IB link and linktraining states and masks that vary per chip in + * some way. Set at init, to avoid each IB status change interrupt + */ + u8 ibcs_ls_shift; + u8 ibcs_lts_mask; + u32 ibcs_mask; + u32 ib_init; + u32 ib_arm; + u32 ib_active; + + u16 ipath_rhf_offset; /* offset of RHF within receive header entry */ + + /* + * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol + * reg. Changes for IBA7220 + */ + u8 ibcc_lic_mask; /* LinkInitCmd */ + u8 ibcc_lc_shift; /* LinkCmd */ + u8 ibcc_mpl_shift; /* Maxpktlen */ + + u8 delay_mult; + /* used to override LED behavior */ u8 ipath_led_override; /* Substituted for normal value, if non-zero */ u16 ipath_led_override_timeoff; /* delta to next timer event */ @@ -616,7 +685,7 @@ struct ipath_devdata { /* control access to actual counters, timer */ spinlock_t ipath_eep_st_lock; /* control high-level access to EEPROM */ - struct semaphore ipath_eep_sem; + struct mutex ipath_eep_lock; /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ uint64_t ipath_traffic_wds; /* active time is kept in seconds, but logged in hours */ @@ -630,6 +699,10 @@ struct ipath_devdata { * each of the counters to increment. */ struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; + + /* interrupt mitigation reload register info */ + u16 ipath_jint_idle_ticks; /* idle clock ticks */ + u16 ipath_jint_max_packets; /* max packets across all ports */ }; /* Private data for file operations */ @@ -690,7 +763,7 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); int ipath_parse_ushort(const char *str, unsigned short *valp); -void ipath_kreceive(struct ipath_devdata *); +void ipath_kreceive(struct ipath_portdata *); int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); int ipath_reset_device(int); void ipath_get_faststats(unsigned long); @@ -698,6 +771,8 @@ int ipath_set_linkstate(struct ipath_devdata *, u8); int ipath_set_mtu(struct ipath_devdata *, u16); int ipath_set_lid(struct ipath_devdata *, u32, u8); int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); +void ipath_enable_armlaunch(struct ipath_devdata *); +void ipath_disable_armlaunch(struct ipath_devdata *); /* for use in system calls, where we want to know device type, etc. */ #define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd @@ -744,9 +819,15 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); * are 64bit */ #define IPATH_32BITCOUNTERS 0x20000 /* can miss port0 rx interrupts */ + /* Interrupt register is 64 bits */ +#define IPATH_INTREG_64 0x40000 #define IPATH_DISABLED 0x80000 /* administratively disabled */ /* Use GPIO interrupts for new counters */ #define IPATH_GPIO_ERRINTRS 0x100000 +#define IPATH_SWAP_PIOBUFS 0x200000 + /* Suppress heartbeat, even if turning off loopback */ +#define IPATH_NO_HRTBT 0x1000000 +#define IPATH_HAS_MULT_IB_SPEED 0x8000000 /* Bits in GPIO for the added interrupts */ #define IPATH_GPIO_PORT0_BIT 2 @@ -758,8 +839,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* portdata flag bit offsets */ /* waiting for a packet to arrive */ #define IPATH_PORT_WAITING_RCV 2 - /* waiting for a PIO buffer to be available */ -#define IPATH_PORT_WAITING_PIO 3 /* master has not finished initializing */ #define IPATH_PORT_MASTER_UNINIT 4 /* waiting for an urgent packet to arrive */ @@ -767,8 +846,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* free up any allocated data at closes */ void ipath_free_data(struct ipath_portdata *dd); -int ipath_waitfor_mdio_cmdready(struct ipath_devdata *); -int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *); u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); void ipath_init_iba6120_funcs(struct ipath_devdata *); void ipath_init_iba6110_funcs(struct ipath_devdata *); @@ -792,33 +869,6 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); */ #define IPATH_DFLT_RCVHDRSIZE 9 -#define IPATH_MDIO_CMD_WRITE 1 -#define IPATH_MDIO_CMD_READ 2 -#define IPATH_MDIO_CLD_DIV 25 /* to get 2.5 Mhz mdio clock */ -#define IPATH_MDIO_CMDVALID 0x40000000 /* bit 30 */ -#define IPATH_MDIO_DATAVALID 0x80000000 /* bit 31 */ -#define IPATH_MDIO_CTRL_STD 0x0 - -static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data) -{ - return (((u64) IPATH_MDIO_CLD_DIV) << 32) | - (cmd << 26) | - (dev << 21) | - (reg << 16) | - (data & 0xFFFF); -} - - /* signal and fifo status, in bank 31 */ -#define IPATH_MDIO_CTRL_XGXS_REG_8 0x8 - /* controls loopback, redundancy */ -#define IPATH_MDIO_CTRL_8355_REG_1 0x10 - /* premph, encdec, etc. */ -#define IPATH_MDIO_CTRL_8355_REG_2 0x11 - /* Kchars, etc. */ -#define IPATH_MDIO_CTRL_8355_REG_6 0x15 -#define IPATH_MDIO_CTRL_8355_REG_9 0x18 -#define IPATH_MDIO_CTRL_8355_REG_10 0x1D - int ipath_get_user_pages(unsigned long, size_t, struct page **); void ipath_release_user_pages(struct page **, size_t); void ipath_release_user_pages_on_close(struct page **, size_t); @@ -863,7 +913,7 @@ static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, return readl(regno + (u64 __iomem *) (dd->ipath_uregbase + (char __iomem *)dd->ipath_kregbase + - dd->ipath_palign * port)); + dd->ipath_ureg_align * port)); } /** @@ -880,7 +930,7 @@ static inline void ipath_write_ureg(const struct ipath_devdata *dd, { u64 __iomem *ubase = (u64 __iomem *) (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + - dd->ipath_palign * port); + dd->ipath_ureg_align * port); if (dd->ipath_kregbase) writeq(value, &ubase[regno]); } @@ -930,6 +980,53 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, (char __iomem *)dd->ipath_kregbase)); } +static inline void ipath_write_creg(const struct ipath_devdata *dd, + ipath_creg regno, u64 value) +{ + if (dd->ipath_kregbase) + writeq(value, regno + (u64 __iomem *) + (dd->ipath_cregbase + + (char __iomem *)dd->ipath_kregbase)); +} + +static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd) +{ + *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL; +} + +static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd) +{ + return (u32) le64_to_cpu(*((volatile __le64 *) + pd->port_rcvhdrtail_kvaddr)); +} + +static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r) +{ + return (dd->ipath_flags & IPATH_INTREG_64) ? + ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r); +} + +/* + * from contents of IBCStatus (or a saved copy), return linkstate + * Report ACTIVE_DEFER as ACTIVE, because we treat them the same + * everywhere, anyway (and should be, for almost all purposes). + */ +static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs) +{ + u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) & + INFINIPATH_IBCS_LINKSTATE_MASK; + if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER) + state = INFINIPATH_IBCS_L_STATE_ACTIVE; + return state; +} + +/* from contents of IBCStatus (or a saved copy), return linktrainingstate */ +static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs) +{ + return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & + dd->ibcs_lts_mask; +} + /* * sysfs interface. */ @@ -938,8 +1035,7 @@ struct device_driver; extern const char ib_ipath_version[]; -int ipath_driver_create_group(struct device_driver *); -void ipath_driver_remove_group(struct device_driver *); +extern struct attribute_group *ipath_driver_attr_groups[]; int ipath_device_create_group(struct device *, struct ipath_devdata *); void ipath_device_remove_group(struct device *, struct ipath_devdata *); diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 85a4aefc6c0..8f32b17a5ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -128,9 +128,8 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, int ret; /* - * We use LKEY == zero to mean a physical kmalloc() address. - * This is a bit of a hack since we rely on dma_map_single() - * being reversible by calling bus_to_virt(). + * We use LKEY == zero for kernel virtual addresses + * (see ipath_get_dma_mr and ipath_dma.c). */ if (sge->lkey == 0) { struct ipath_pd *pd = to_ipd(qp->ibqp.pd); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 3d1432d1e3f..d98d5f10370 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -934,6 +934,7 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, struct ib_pma_portsamplescontrol *p = (struct ib_pma_portsamplescontrol *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; unsigned long flags; u8 port_select = p->port_select; @@ -955,7 +956,10 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, p->counter_width = 4; /* 32 bit counters */ p->counter_mask0_9 = COUNTER_MASK0_9; spin_lock_irqsave(&dev->pending_lock, flags); - p->sample_status = dev->pma_sample_status; + if (crp->cr_psstat) + p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + p->sample_status = dev->pma_sample_status; p->sample_start = cpu_to_be32(dev->pma_sample_start); p->sample_interval = cpu_to_be32(dev->pma_sample_interval); p->tag = cpu_to_be16(dev->pma_tag); @@ -975,8 +979,9 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp, struct ib_pma_portsamplescontrol *p = (struct ib_pma_portsamplescontrol *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; unsigned long flags; - u32 start; + u8 status; int ret; if (pmp->attr_mod != 0 || @@ -986,59 +991,67 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp, goto bail; } - start = be32_to_cpu(p->sample_start); - if (start != 0) { - spin_lock_irqsave(&dev->pending_lock, flags); - if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_DONE) { - dev->pma_sample_status = - IB_PMA_SAMPLE_STATUS_STARTED; - dev->pma_sample_start = start; - dev->pma_sample_interval = - be32_to_cpu(p->sample_interval); - dev->pma_tag = be16_to_cpu(p->tag); - if (p->counter_select[0]) - dev->pma_counter_select[0] = - p->counter_select[0]; - if (p->counter_select[1]) - dev->pma_counter_select[1] = - p->counter_select[1]; - if (p->counter_select[2]) - dev->pma_counter_select[2] = - p->counter_select[2]; - if (p->counter_select[3]) - dev->pma_counter_select[3] = - p->counter_select[3]; - if (p->counter_select[4]) - dev->pma_counter_select[4] = - p->counter_select[4]; - } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_lock_irqsave(&dev->pending_lock, flags); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + dev->pma_sample_start = be32_to_cpu(p->sample_start); + dev->pma_sample_interval = be32_to_cpu(p->sample_interval); + dev->pma_tag = be16_to_cpu(p->tag); + dev->pma_counter_select[0] = p->counter_select[0]; + dev->pma_counter_select[1] = p->counter_select[1]; + dev->pma_counter_select[2] = p->counter_select[2]; + dev->pma_counter_select[3] = p->counter_select[3]; + dev->pma_counter_select[4] = p->counter_select[4]; + if (crp->cr_psstat) { + ipath_write_creg(dev->dd, crp->cr_psinterval, + dev->pma_sample_interval); + ipath_write_creg(dev->dd, crp->cr_psstart, + dev->pma_sample_start); + } else + dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED; } + spin_unlock_irqrestore(&dev->pending_lock, flags); + ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port); bail: return ret; } -static u64 get_counter(struct ipath_ibdev *dev, __be16 sel) +static u64 get_counter(struct ipath_ibdev *dev, + struct ipath_cregs const *crp, + __be16 sel) { u64 ret; switch (sel) { case IB_PMA_PORT_XMIT_DATA: - ret = dev->ipath_sword; + ret = (crp->cr_psxmitdatacount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) : + dev->ipath_sword; break; case IB_PMA_PORT_RCV_DATA: - ret = dev->ipath_rword; + ret = (crp->cr_psrcvdatacount) ? + ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) : + dev->ipath_rword; break; case IB_PMA_PORT_XMIT_PKTS: - ret = dev->ipath_spkts; + ret = (crp->cr_psxmitpktscount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) : + dev->ipath_spkts; break; case IB_PMA_PORT_RCV_PKTS: - ret = dev->ipath_rpkts; + ret = (crp->cr_psrcvpktscount) ? + ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) : + dev->ipath_rpkts; break; case IB_PMA_PORT_XMIT_WAIT: - ret = dev->ipath_xmit_wait; + ret = (crp->cr_psxmitwaitcount) ? + ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) : + dev->ipath_xmit_wait; break; default: ret = 0; @@ -1053,14 +1066,21 @@ static int recv_pma_get_portsamplesresult(struct ib_perf *pmp, struct ib_pma_portsamplesresult *p = (struct ib_pma_portsamplesresult *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + u8 status; int i; memset(pmp->data, 0, sizeof(pmp->data)); p->tag = cpu_to_be16(dev->pma_tag); - p->sample_status = cpu_to_be16(dev->pma_sample_status); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + p->sample_status = cpu_to_be16(status); for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = cpu_to_be32( - get_counter(dev, dev->pma_counter_select[i])); + p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : + cpu_to_be32( + get_counter(dev, crp, dev->pma_counter_select[i])); return reply((struct ib_smp *) pmp); } @@ -1071,16 +1091,23 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp, struct ib_pma_portsamplesresult_ext *p = (struct ib_pma_portsamplesresult_ext *)pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); + struct ipath_cregs const *crp = dev->dd->ipath_cregs; + u8 status; int i; memset(pmp->data, 0, sizeof(pmp->data)); p->tag = cpu_to_be16(dev->pma_tag); - p->sample_status = cpu_to_be16(dev->pma_sample_status); + if (crp->cr_psstat) + status = ipath_read_creg32(dev->dd, crp->cr_psstat); + else + status = dev->pma_sample_status; + p->sample_status = cpu_to_be16(status); /* 64 bits */ p->extended_width = __constant_cpu_to_be32(0x80000000); for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = cpu_to_be64( - get_counter(dev, dev->pma_counter_select[i])); + p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : + cpu_to_be64( + get_counter(dev, crp, dev->pma_counter_select[i])); return reply((struct ib_smp *) pmp); } @@ -1113,6 +1140,8 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, dev->z_local_link_integrity_errors; cntrs.excessive_buffer_overrun_errors -= dev->z_excessive_buffer_overrun_errors; + cntrs.vl15_dropped -= dev->z_vl15_dropped; + cntrs.vl15_dropped += dev->n_vl15_dropped; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1156,10 +1185,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, cntrs.excessive_buffer_overrun_errors = 0xFUL; p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | cntrs.excessive_buffer_overrun_errors; - if (dev->n_vl15_dropped > 0xFFFFUL) + if (cntrs.vl15_dropped > 0xFFFFUL) p->vl15_dropped = __constant_cpu_to_be16(0xFFFF); else - p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped); + p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); if (cntrs.port_xmit_data > 0xFFFFFFFFUL) p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); else @@ -1262,8 +1291,10 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp, dev->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; - if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { dev->n_vl15_dropped = 0; + dev->z_vl15_dropped = cntrs.vl15_dropped; + } if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) dev->z_port_xmit_data = cntrs.port_xmit_data; @@ -1434,7 +1465,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, * before checking for other consumers. * Just tell the caller to process it normally. */ - ret = IB_MAD_RESULT_FAILURE; + ret = IB_MAD_RESULT_SUCCESS; goto bail; default: smp->status |= IB_SMP_UNSUP_METHOD; @@ -1516,7 +1547,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num, * before checking for other consumers. * Just tell the caller to process it normally. */ - ret = IB_MAD_RESULT_FAILURE; + ret = IB_MAD_RESULT_SUCCESS; goto bail; default: pmp->status |= IB_SMP_UNSUP_METHOD; diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 6a41fdbc8e5..80dc623cee4 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -387,8 +387,8 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) struct ib_wc wc; int ret = 0; - ipath_dbg("QP%d/%d in error state\n", - qp->ibqp.qp_num, qp->remote_qpn); + ipath_dbg("QP%d/%d in error state (%d)\n", + qp->ibqp.qp_num, qp->remote_qpn, err); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ @@ -835,7 +835,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->qp_type); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + vfree(qp->r_rq.wq); + goto bail_qp; } qp->ip = NULL; ipath_reset_qp(qp); @@ -854,8 +855,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - int err; - if (!qp->r_rq.wq) { __u64 offset = 0; @@ -863,7 +862,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, sizeof(offset)); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + goto bail_ip; } } else { u32 s = sizeof(struct ipath_rwq) + @@ -875,7 +874,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); - goto bail_rwq; + goto bail_ip; } err = ib_copy_to_udata(udata, &(qp->ip->offset), @@ -907,9 +906,11 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail; bail_ip: - kfree(qp->ip); -bail_rwq: - vfree(qp->r_rq.wq); + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + ipath_free_qp(&dev->qp_table, qp); bail_qp: kfree(qp); bail_swq: diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5c29b2bfea1..459e46e2c01 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -647,6 +647,7 @@ static void send_rc_ack(struct ipath_qp *qp) queue_ack: spin_lock_irqsave(&qp->s_lock, flags); + dev->n_rc_qacks++; qp->s_flags |= IPATH_S_ACK_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; @@ -798,11 +799,13 @@ bail: static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { - if (qp->s_wait_credit) { - qp->s_wait_credit = 0; - tasklet_hi_schedule(&qp->s_task); + if (qp->s_last_psn != psn) { + qp->s_last_psn = psn; + if (qp->s_wait_credit) { + qp->s_wait_credit = 0; + tasklet_hi_schedule(&qp->s_task); + } } - qp->s_last_psn = psn; } /** @@ -959,8 +962,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, /* If this is a partial ACK, reset the retransmit timer. */ if (qp->s_last != qp->s_tail) { spin_lock(&dev->pending_lock); - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); /* * If we get a partial ACK for a resent operation, @@ -1652,13 +1656,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_FIRST): if (!ipath_get_rwqe(qp, 0)) { rnr_nak: - /* - * A RNR NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read or atomic - * is pending though. - */ - if (qp->r_nak_state) - goto done; qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; qp->r_ack_psn = qp->r_psn; goto send_ack; diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 708eba3165d..6d2a17f9c1d 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -82,8 +82,7 @@ /* kr_rcvctrl bits */ #define INFINIPATH_R_PORTENABLE_SHIFT 0 -#define INFINIPATH_R_INTRAVAIL_SHIFT 16 -#define INFINIPATH_R_TAILUPD 0x80000000 +#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38) /* kr_intstatus, kr_intclear, kr_intmask bits */ #define INFINIPATH_I_RCVURG_SHIFT 0 @@ -272,20 +271,6 @@ #define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL #define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL -/* kr_mdio bits */ -#define INFINIPATH_MDIO_CLKDIV_MASK 0x7FULL -#define INFINIPATH_MDIO_CLKDIV_SHIFT 32 -#define INFINIPATH_MDIO_COMMAND_MASK 0x7ULL -#define INFINIPATH_MDIO_COMMAND_SHIFT 26 -#define INFINIPATH_MDIO_DEVADDR_MASK 0x1FULL -#define INFINIPATH_MDIO_DEVADDR_SHIFT 21 -#define INFINIPATH_MDIO_REGADDR_MASK 0x1FULL -#define INFINIPATH_MDIO_REGADDR_SHIFT 16 -#define INFINIPATH_MDIO_DATA_MASK 0xFFFFULL -#define INFINIPATH_MDIO_DATA_SHIFT 0 -#define INFINIPATH_MDIO_CMDVALID 0x0000000040000000ULL -#define INFINIPATH_MDIO_RDDATAVALID 0x0000000080000000ULL - /* kr_partitionkey bits */ #define INFINIPATH_PKEY_SIZE 16 #define INFINIPATH_PKEY_MASK 0xFFFF @@ -303,8 +288,6 @@ /* kr_xgxsconfig bits */ #define INFINIPATH_XGXS_RESET 0x7ULL -#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL -#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4 #define INFINIPATH_XGXS_RX_POL_SHIFT 19 #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL @@ -470,6 +453,20 @@ struct ipath_cregs { ipath_creg cr_unsupvlcnt; ipath_creg cr_wordrcvcnt; ipath_creg cr_wordsendcnt; + ipath_creg cr_vl15droppedpktcnt; + ipath_creg cr_rxotherlocalphyerrcnt; + ipath_creg cr_excessbufferovflcnt; + ipath_creg cr_locallinkintegrityerrcnt; + ipath_creg cr_rxvlerrcnt; + ipath_creg cr_rxdlidfltrcnt; + ipath_creg cr_psstat; + ipath_creg cr_psstart; + ipath_creg cr_psinterval; + ipath_creg cr_psrcvdatacount; + ipath_creg cr_psrcvpktscount; + ipath_creg cr_psxmitdatacount; + ipath_creg cr_psxmitpktscount; + ipath_creg cr_psxmitwaitcount; }; #endif /* _IPATH_REGISTERS_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 4b6b7ee8e5c..a59bdbd0ed8 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -98,11 +98,15 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) { qp->s_rnr_timeout -= nqp->s_rnr_timeout; l = l->next; - if (l->next == &dev->rnrwait) + if (l->next == &dev->rnrwait) { + nqp = NULL; break; + } nqp = list_entry(l->next, struct ipath_qp, timerwait); } + if (nqp) + nqp->s_rnr_timeout -= qp->s_rnr_timeout; list_add(&qp->timerwait, l); } spin_unlock_irqrestore(&dev->pending_lock, flags); @@ -479,9 +483,14 @@ done: static void want_buffer(struct ipath_devdata *dd) { - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } /** @@ -630,11 +639,8 @@ bail:; void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, enum ib_wc_status status) { - u32 last = qp->s_last; - - if (++last == qp->s_size) - last = 0; - qp->s_last = last; + unsigned long flags; + u32 last; /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -658,4 +664,11 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, wc.port_num = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); } + + spin_lock_irqsave(&qp->s_lock, flags); + last = qp->s_last; + if (++last >= qp->s_size) + last = 0; + qp->s_last = last; + spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 40c36ec1901..f772102e471 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -59,7 +59,7 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } @@ -94,8 +94,8 @@ bail: /** * ipath_create_srq - create a shared receive queue * @ibpd: the protection domain of the SRQ to create - * @attr: the attributes of the SRQ - * @udata: not used by the InfiniPath verbs driver + * @srq_init_attr: the attributes of the SRQ + * @udata: data from libipathverbs when creating a user SRQ */ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, @@ -211,11 +211,11 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); + struct ipath_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { struct ipath_rwq *owq; - struct ipath_rwq *wq; struct ipath_rwqe *p; u32 sz, size, n, head, tail; @@ -236,27 +236,20 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail; } - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; - __u64 offset = (__u64) wq; + __u64 offset = 0; ret = ib_copy_from_udata(&offset_addr, udata, sizeof(offset_addr)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; udata->outbuf = (void __user *) offset_addr; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; } spin_lock_irq(&srq->rq.lock); @@ -277,10 +270,8 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, else n -= tail; if (size <= n) { - spin_unlock_irq(&srq->rq.lock); - vfree(wq); ret = -EINVAL; - goto bail; + goto bail_unlock; } n = 0; p = wq->wq; @@ -314,6 +305,18 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, u32 s = sizeof(struct ipath_rwq) + size * sz; ipath_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, @@ -328,7 +331,12 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); } + goto bail; +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); bail: return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index f0271415cd5..d2725cd11bd 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -133,15 +133,16 @@ bail: static void ipath_qcheck(struct ipath_devdata *dd) { static u64 last_tot_hdrqfull; + struct ipath_portdata *pd = dd->ipath_pd[0]; size_t blen = 0; char buf[128]; *buf = 0; - if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) { + if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) { blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", - dd->ipath_pd[0]->port_hdrqfull - + pd->port_hdrqfull - dd->ipath_p0_hdrqfull); - dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull; + dd->ipath_p0_hdrqfull = pd->port_hdrqfull; } if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { blen += snprintf(buf + blen, sizeof buf - blen, @@ -173,7 +174,7 @@ static void ipath_qcheck(struct ipath_devdata *dd) if (blen) ipath_dbg("%s\n", buf); - if (dd->ipath_port0head != (u32) + if (pd->port_head != (u32) le64_to_cpu(*dd->ipath_hdrqtailptr)) { if (dd->ipath_lastport0rcv_cnt == ipath_stats.sps_port0pkts) { @@ -181,7 +182,7 @@ static void ipath_qcheck(struct ipath_devdata *dd) "port0 hd=%llx tl=%x; port0pkts %llx\n", (unsigned long long) le64_to_cpu(*dd->ipath_hdrqtailptr), - dd->ipath_port0head, + pd->port_head, (unsigned long long) ipath_stats.sps_port0pkts); } @@ -237,7 +238,7 @@ static void ipath_chk_errormask(struct ipath_devdata *dd) void ipath_get_faststats(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - u32 val; + int i; static unsigned cnt; unsigned long flags; u64 traffic_wds; @@ -321,12 +322,11 @@ void ipath_get_faststats(unsigned long opaque) /* limit qfull messages to ~one per minute per port */ if ((++cnt & 0x10)) { - for (val = dd->ipath_cfgports - 1; ((int)val) >= 0; - val--) { - if (dd->ipath_lastegrheads[val] != -1) - dd->ipath_lastegrheads[val] = -1; - if (dd->ipath_lastrcvhdrqtails[val] != -1) - dd->ipath_lastrcvhdrqtails[val] = -1; + for (i = (int) dd->ipath_cfgports; --i >= 0; ) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + if (pd && pd->port_lastrcvhdrqtail != -1) + pd->port_lastrcvhdrqtail = -1; } } diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index e1ad7cfc21f..56dfc8a2344 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -363,6 +363,60 @@ static ssize_t show_unit(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit); } +static ssize_t show_jint_max_packets(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets); +} + +static ssize_t store_jint_max_packets(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 v = 0; + int ret; + + ret = ipath_parse_ushort(buf, &v); + if (ret < 0) + ipath_dev_err(dd, "invalid jint_max_packets.\n"); + else + dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v); + + return ret; +} + +static ssize_t show_jint_idle_ticks(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks); +} + +static ssize_t store_jint_idle_ticks(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + u16 v = 0; + int ret; + + ret = ipath_parse_ushort(buf, &v); + if (ret < 0) + ipath_dev_err(dd, "invalid jint_idle_ticks.\n"); + else + dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets); + + return ret; +} + #define DEVICE_COUNTER(name, attr) \ static ssize_t show_counter_##name(struct device *dev, \ struct device_attribute *attr, \ @@ -670,6 +724,257 @@ static ssize_t show_logged_errs(struct device *dev, return count; } +/* + * New sysfs entries to control various IB config. These all turn into + * accesses via ipath_f_get/set_ib_cfg. + * + * Get/Set heartbeat enable. Or of 1=enabled, 2=auto + */ +static ssize_t show_hrtbt_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_hrtbt_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && val > 3) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + goto bail; + } + + /* + * Set the "intentional" heartbeat enable per either of + * "Enable" and "Auto", as these are normally set together. + * This bit is consulted when leaving loopback mode, + * because entering loopback mode overrides it and automatically + * disables heartbeat. + */ + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val); + if (r < 0) + ret = r; + else if (val == IPATH_IB_HRTBT_OFF) + dd->ipath_flags |= IPATH_NO_HRTBT; + else + dd->ipath_flags &= ~IPATH_NO_HRTBT; + +bail: + return ret; +} + +/* + * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric, + * _not_ the particular encoding of any given chip) + */ +static ssize_t show_lwid_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_lwid_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && (val == 0 || val > 3)) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, + "attempt to set invalid Link Width (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +/* Get current link width */ +static ssize_t show_lwid(struct device *dev, + struct device_attribute *attr, + char *buf) + +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +/* + * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR. + */ +static ssize_t show_spd_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_spd_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR))) + ret = -EINVAL; + if (ret < 0) { + ipath_dev_err(dd, + "attempt to set invalid Link Speed (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + +/* Get current link speed */ +static ssize_t show_spd(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +/* + * Get/Set RX polarity-invert enable. 0=no, 1=yes. + */ +static ssize_t show_rx_polinv_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_rx_polinv_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret < 0 || val > 1) + goto invalid; + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val); + if (r < 0) { + ret = r; + goto bail; + } + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid Rx Polarity (enable)\n"); +bail: + return ret; +} +/* + * Get/Set RX lane-reversal enable. 0=no, 1=yes. + */ +static ssize_t show_lanerev_enb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + + ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB); + if (ret >= 0) + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_lanerev_enb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret >= 0 && val > 1) { + ret = -EINVAL; + ipath_dev_err(dd, + "attempt to set invalid Lane reversal (enable)\n"); + goto bail; + } + + r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val); + if (r < 0) + ret = r; + +bail: + return ret; +} + static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -683,6 +988,11 @@ static struct attribute_group driver_attr_group = { .attrs = driver_attributes }; +struct attribute_group *ipath_driver_attr_groups[] = { + &driver_attr_group, + NULL, +}; + static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid); static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc); static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid); @@ -701,6 +1011,10 @@ static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); +static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO, + show_jint_max_packets, store_jint_max_packets); +static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO, + show_jint_idle_ticks, store_jint_idle_ticks); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -727,6 +1041,34 @@ static struct attribute_group dev_attr_group = { .attrs = dev_attributes }; +static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb, + store_hrtbt_enb); +static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb, + store_lwid_enb); +static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL); +static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb, + store_spd_enb); +static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL); +static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb, + store_rx_polinv_enb); +static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb, + store_lanerev_enb); + +static struct attribute *dev_ibcfg_attributes[] = { + &dev_attr_hrtbt_enable.attr, + &dev_attr_link_width_enable.attr, + &dev_attr_link_width.attr, + &dev_attr_link_speed_enable.attr, + &dev_attr_link_speed.attr, + &dev_attr_rx_pol_inv_enable.attr, + &dev_attr_rx_lane_rev_enable.attr, + NULL +}; + +static struct attribute_group dev_ibcfg_attr_group = { + .attrs = dev_ibcfg_attributes +}; + /** * ipath_expose_reset - create a device reset file * @dev: the device structure @@ -753,24 +1095,9 @@ int ipath_expose_reset(struct device *dev) return ret; } -int ipath_driver_create_group(struct device_driver *drv) -{ - int ret; - - ret = sysfs_create_group(&drv->kobj, &driver_attr_group); - - return ret; -} - -void ipath_driver_remove_group(struct device_driver *drv) -{ - sysfs_remove_group(&drv->kobj, &driver_attr_group); -} - int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) { int ret; - char unit[5]; ret = sysfs_create_group(&dev->kobj, &dev_attr_group); if (ret) @@ -780,11 +1107,26 @@ int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) if (ret) goto bail_attrs; - snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); - ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, unit); - if (ret == 0) - goto bail; + if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { + ret = device_create_file(dev, &dev_attr_jint_idle_ticks); + if (ret) + goto bail_counter; + ret = device_create_file(dev, &dev_attr_jint_max_packets); + if (ret) + goto bail_idle; + ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group); + if (ret) + goto bail_max; + } + + return 0; + +bail_max: + device_remove_file(dev, &dev_attr_jint_max_packets); +bail_idle: + device_remove_file(dev, &dev_attr_jint_idle_ticks); +bail_counter: sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); bail_attrs: sysfs_remove_group(&dev->kobj, &dev_attr_group); @@ -794,12 +1136,14 @@ bail: void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) { - char unit[5]; + sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); - snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); - sysfs_remove_link(&dev->driver->kobj, unit); + if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { + sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group); + device_remove_file(dev, &dev_attr_jint_idle_ticks); + device_remove_file(dev, &dev_attr_jint_max_packets); + } - sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); sysfs_remove_group(&dev->kobj, &dev_attr_group); device_remove_file(dev, &dev_attr_reset); diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 16a2a938b52..de67eed08ed 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -301,8 +301,6 @@ int ipath_make_ud_req(struct ipath_qp *qp) /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ qp->s_hdrwords = 7; - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - qp->s_hdrwords++; qp->s_cur_size = wqe->length; qp->s_cur_sge = &qp->s_sge; qp->s_wqe = wqe; @@ -327,6 +325,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) ohdr = &qp->s_hdr.u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + qp->s_hdrwords++; ohdr->u.ud.imm_data = wqe->wr.imm_data; bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; } else @@ -455,6 +454,28 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } } + /* + * The opcode is in the low byte when its in network order + * (top byte when in host order). + */ + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (qp->ibqp.qp_num > 1 && + opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { + if (header_in_data) { + wc.imm_data = *(__be32 *) data; + data += sizeof(__be32); + } else + wc.imm_data = ohdr->u.ud.imm_data; + wc.wc_flags = IB_WC_WITH_IMM; + hdrsize += sizeof(u32); + } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { + wc.imm_data = 0; + wc.wc_flags = 0; + } else { + dev->n_pkt_drops++; + goto bail; + } + /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; if (unlikely(tlen < (hdrsize + pad + 4))) { @@ -482,28 +503,6 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.byte_len = tlen + sizeof(struct ib_grh); /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - if (qp->ibqp.qp_num > 1 && - opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { - if (header_in_data) { - wc.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else - wc.imm_data = ohdr->u.ud.imm_data; - wc.wc_flags = IB_WC_WITH_IMM; - hdrsize += sizeof(u32); - } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.imm_data = 0; - wc.wc_flags = 0; - } else { - dev->n_pkt_drops++; - goto bail; - } - - /* * Get the next work request entry to find where to put the data. */ if (qp->r_reuse_sge) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 74f77e7c2c1..32d8f882e56 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -302,8 +302,10 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) next = qp->s_head + 1; if (next >= qp->s_size) next = 0; - if (next == qp->s_last) - goto bail_inval; + if (next == qp->s_last) { + ret = -ENOMEM; + goto bail; + } wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; @@ -404,7 +406,7 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } @@ -941,7 +943,7 @@ bail: * ipath_verbs_send - send a packet * @qp: the QP to send on * @hdr: the packet header - * @hdrwords: the number of words in the header + * @hdrwords: the number of 32-bit words in the header * @ss: the SGE to send * @len: the length of the packet in bytes */ @@ -953,7 +955,10 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, int ret; u32 dwords = (len + 3) >> 2; - /* +1 is for the qword padding of pbc */ + /* + * Calculate the send buffer trigger address. + * The +1 counts for the pbc control dword following the pbc length. + */ plen = hdrwords + dwords + 1; /* Drop non-VL15 packets if we are not in the active state */ @@ -1128,20 +1133,34 @@ static int ipath_query_device(struct ib_device *ibdev, return 0; } -const u8 ipath_cvt_physportstate[16] = { - [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3, - [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5, - [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2, - [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2, - [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1, - [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1, - [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4, - [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4, - [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4, - [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4, - [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6, - [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6, - [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, +const u8 ipath_cvt_physportstate[32] = { + [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN }; u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) @@ -1166,8 +1185,9 @@ static int ipath_query_port(struct ib_device *ibdev, ibcstat = dd->ipath_lastibcstat; props->state = ((ibcstat >> 4) & 0x3) + 1; /* See phys_state_show() */ - props->phys_state = ipath_cvt_physportstate[ - dd->ipath_lastibcstat & 0xf]; + props->phys_state = /* MEA: assumes shift == 0 */ + ipath_cvt_physportstate[dd->ipath_lastibcstat & + dd->ibcs_lts_mask]; props->port_cap_flags = dev->port_cap_flags; props->gid_tbl_len = 1; props->max_msg_sz = 0x80000000; @@ -1639,6 +1659,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) cntrs.local_link_integrity_errors; idev->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; + idev->z_vl15_dropped = cntrs.vl15_dropped; /* * The system image GUID is supposed to be the same for all diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 6ccb54f104a..3d59736b49b 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -554,6 +554,7 @@ struct ipath_ibdev { u32 z_pkey_violations; /* starting count for PMA */ u32 z_local_link_integrity_errors; /* starting count for PMA */ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ + u32 z_vl15_dropped; /* starting count for PMA */ u32 n_rc_resends; u32 n_rc_acks; u32 n_rc_qacks; @@ -598,6 +599,7 @@ struct ipath_verbs_counters { u64 port_rcv_packets; u32 local_link_integrity_errors; u32 excessive_buffer_overrun_errors; + u32 vl15_dropped; }; static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) @@ -830,7 +832,17 @@ unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; +/* + * Below converts HCA-specific LinkTrainingState to IB PhysPortState + * values. + */ extern const u8 ipath_cvt_physportstate[]; +#define IB_PHYSPORTSTATE_SLEEP 1 +#define IB_PHYSPORTSTATE_POLL 2 +#define IB_PHYSPORTSTATE_DISABLED 3 +#define IB_PHYSPORTSTATE_CFG_TRAIN 4 +#define IB_PHYSPORTSTATE_LINKUP 5 +#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6 extern const int ib_ipath_state_ops[]; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 8bf44daf45e..7950aa6e818 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -313,6 +313,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_srq *srq; int is_send; int is_error; + u32 g_mlpath_rqpn; u16 wqe_ctr; cqe = next_cqe_sw(cq); @@ -426,11 +427,11 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, wc->slid = be16_to_cpu(cqe->rlid); wc->sl = cqe->sl >> 4; - wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff; - wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f; - wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? - IB_WC_GRH : 0; - wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16; + g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); + wc->src_qp = g_mlpath_rqpn & 0xffffff; + wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; + wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; + wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; } return 0; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 6b3322486b5..8cba9c532e6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1282,7 +1282,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int size; int i; - spin_lock_irqsave(&qp->rq.lock, flags); + spin_lock_irqsave(&qp->sq.lock, flags); ind = qp->sq.head; @@ -1448,7 +1448,7 @@ out: (qp->sq.wqe_cnt - 1)); } - spin_unlock_irqrestore(&qp->rq.lock, flags); + spin_unlock_irqrestore(&qp->sq.lock, flags); return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 15aa32eb78b..7bbdd1f4e6c 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -60,13 +60,12 @@ enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, MTHCA_FLAG_SRQ = 1 << 2, - MTHCA_FLAG_MSI = 1 << 3, - MTHCA_FLAG_MSI_X = 1 << 4, - MTHCA_FLAG_NO_LAM = 1 << 5, - MTHCA_FLAG_FMR = 1 << 6, - MTHCA_FLAG_MEMFREE = 1 << 7, - MTHCA_FLAG_PCIE = 1 << 8, - MTHCA_FLAG_SINAI_OPT = 1 << 9 + MTHCA_FLAG_MSI_X = 1 << 3, + MTHCA_FLAG_NO_LAM = 1 << 4, + MTHCA_FLAG_FMR = 1 << 5, + MTHCA_FLAG_MEMFREE = 1 << 6, + MTHCA_FLAG_PCIE = 1 << 7, + MTHCA_FLAG_SINAI_OPT = 1 << 8 }; enum { diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index b29de51b7f3..b60eb5df96e 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -827,8 +827,7 @@ int mthca_init_eq_table(struct mthca_dev *dev) if (err) goto err_out_free; - if (dev->mthca_flags & MTHCA_FLAG_MSI || - dev->mthca_flags & MTHCA_FLAG_MSI_X) { + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { dev->eq_table.clr_mask = 0; } else { dev->eq_table.clr_mask = @@ -839,8 +838,7 @@ int mthca_init_eq_table(struct mthca_dev *dev) dev->eq_table.arm_mask = 0; - intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? - 128 : dev->eq_table.inta_pin; + intr = dev->eq_table.inta_pin; err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 60de6f93869..5cf8250d4e1 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -65,14 +65,9 @@ static int msi_x = 1; module_param(msi_x, int, 0444); MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); -static int msi = 0; -module_param(msi, int, 0444); -MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)"); - #else /* CONFIG_PCI_MSI */ #define msi_x (0) -#define msi (0) #endif /* CONFIG_PCI_MSI */ @@ -816,13 +811,11 @@ static int mthca_setup_hca(struct mthca_dev *dev) err = mthca_NOP(dev, &status); if (err || status) { - if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) { + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { mthca_warn(dev, "NOP command failed to generate interrupt " "(IRQ %d).\n", - dev->mthca_flags & MTHCA_FLAG_MSI_X ? - dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector : - dev->pdev->irq); - mthca_warn(dev, "Trying again with MSI/MSI-X disabled.\n"); + dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector); + mthca_warn(dev, "Trying again with MSI-X disabled.\n"); } else { mthca_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", @@ -1005,7 +998,7 @@ static struct { .flags = 0 }, [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200), .flags = MTHCA_FLAG_PCIE }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0), + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 3, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE }, [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0), @@ -1128,29 +1121,12 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (msi_x && !mthca_enable_msi_x(mdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI_X; - else if (msi) { - static int warned; - - if (!warned) { - printk(KERN_WARNING PFX "WARNING: MSI support will be " - "removed from the ib_mthca driver in January 2008.\n"); - printk(KERN_WARNING " If you are using MSI and cannot " - "switch to MSI-X, please tell " - "<general@lists.openfabrics.org>.\n"); - ++warned; - } - - if (!pci_enable_msi(pdev)) - mdev->mthca_flags |= MTHCA_FLAG_MSI; - } err = mthca_setup_hca(mdev); - if (err == -EBUSY && (mdev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))) { + if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) { if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); - if (mdev->mthca_flags & MTHCA_FLAG_MSI) - pci_disable_msi(pdev); - mdev->mthca_flags &= ~(MTHCA_FLAG_MSI_X | MTHCA_FLAG_MSI); + mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X; err = mthca_setup_hca(mdev); } @@ -1192,8 +1168,6 @@ err_cleanup: err_close: if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); - if (mdev->mthca_flags & MTHCA_FLAG_MSI) - pci_disable_msi(pdev); mthca_close_hca(mdev); @@ -1246,8 +1220,6 @@ static void __mthca_remove_one(struct pci_dev *pdev) if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); - if (mdev->mthca_flags & MTHCA_FLAG_MSI) - pci_disable_msi(pdev); ib_dealloc_device(&mdev->ib_dev); mthca_release_regions(pdev, mdev->mthca_flags & |