diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-08-21 11:27:00 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-08-21 11:27:00 +0200 |
commit | bcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch) | |
tree | e420679a5db6ea4e1694eef57f9abb6acac8d4d3 /drivers/infiniband | |
parent | 26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff) | |
parent | 000078bc3ee69efb1124b8478c7527389a826074 (diff) |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
* Fix include order for bison/flex-generated C files, from Ben Hutchings
* Build fixes and documentation corrections from David Ahern
* Group parsing support, from Jiri Olsa
* UI/gtk refactorings and improvements from Namhyung Kim
* NULL deref fix for perf script, from Namhyung Kim
* Assorted cleanups from Robert Richter
* Let O= makes handle relative paths, from Steven Rostedt
* perf script python fixes, from Feng Tang.
* Improve 'perf lock' error message when the needed tracepoints
are not present, from David Ahern.
* Initial bash completion support, from Frederic Weisbecker
* Allow building without libelf, from Namhyung Kim.
* Support DWARF CFI based unwind to have callchains when %bp
based unwinding is not possible, from Jiri Olsa.
* Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF
section was the end goal, several fixes for code that handles all
architectures and cleanups are included, from Cody Schafer.
* Add a description for the JIT interface, from Andi Kleen.
* Assorted fixes for Documentation and build in 32 bit, from Robert Richter
* Add support for non-tracepoint events in perf script python, from Feng Tang
* Cache the libtraceevent event_format associated to each evsel early, so that we
avoid relookups, i.e. calling pevent_find_event repeatedly when processing
tracepoint events.
[ This is to reduce the surface contact with libtraceevents and make clear what
is that the perf tools needs from that lib: so far parsing the common and per
event fields. ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/infiniband')
49 files changed, 2661 insertions, 1042 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 6ef660c1332..28058ae33d3 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -129,7 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) dev_put(dev); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { @@ -243,7 +243,7 @@ out: return ret; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int addr6_resolve(struct sockaddr_in6 *src_in, struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c889aaef341..d67999f6e34 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3848,24 +3848,28 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); - if (ret) - return -ENOMEM; + if (ret) { + ret = -ENOMEM; + goto error1; + } cm.wq = create_workqueue("ib_cm"); if (!cm.wq) { ret = -ENOMEM; - goto error1; + goto error2; } ret = ib_register_client(&cm_client); if (ret) - goto error2; + goto error3; return 0; -error2: +error3: destroy_workqueue(cm.wq); -error1: +error2: class_unregister(&cm_class); +error1: + idr_destroy(&cm.local_id_table); return ret; } diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7da9b210234..be068f47e47 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,18 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -#define CM_REQ_ATTR_ID cpu_to_be16(0x0010) -#define CM_MRA_ATTR_ID cpu_to_be16(0x0011) -#define CM_REJ_ATTR_ID cpu_to_be16(0x0012) -#define CM_REP_ATTR_ID cpu_to_be16(0x0013) -#define CM_RTU_ATTR_ID cpu_to_be16(0x0014) -#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) -#define CM_DREP_ATTR_ID cpu_to_be16(0x0016) -#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) -#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) -#define CM_LAP_ATTR_ID cpu_to_be16(0x0019) -#define CM_APR_ATTR_ID cpu_to_be16(0x001A) - enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, CM_MSG_SEQUENCE_LAP, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2e826f9702c..7172559ce0c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -99,6 +99,10 @@ struct rdma_bind_list { unsigned short port; }; +enum { + CMA_OPTION_AFONLY, +}; + /* * Device removal can occur at anytime, so we need extra handling to * serialize notifying the user of device removal with other callbacks. @@ -137,9 +141,11 @@ struct rdma_id_private { u32 qkey; u32 qp_num; pid_t owner; + u32 options; u8 srq; u8 tos; u8 reuseaddr; + u8 afonly; }; struct cma_multicast { @@ -1297,8 +1303,10 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, } else { cma_set_ip_ver(cma_data, 4); cma_set_ip_ver(cma_mask, 0xF); - cma_data->dst_addr.ip4.addr = ip4_addr; - cma_mask->dst_addr.ip4.addr = htonl(~0); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip4.addr = ip4_addr; + cma_mask->dst_addr.ip4.addr = htonl(~0); + } } break; case AF_INET6: @@ -1312,9 +1320,11 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, } else { cma_set_ip_ver(cma_data, 6); cma_set_ip_ver(cma_mask, 0xF); - cma_data->dst_addr.ip6 = ip6_addr; - memset(&cma_mask->dst_addr.ip6, 0xFF, - sizeof cma_mask->dst_addr.ip6); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip6 = ip6_addr; + memset(&cma_mask->dst_addr.ip6, 0xFF, + sizeof cma_mask->dst_addr.ip6); + } } break; default: @@ -1499,7 +1509,7 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; svc_id = cma_get_service_id(id_priv->id.ps, addr); - if (cma_any_addr(addr)) + if (cma_any_addr(addr) && !id_priv->afonly) ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); else { cma_set_compare_data(id_priv->id.ps, addr, &compare_data); @@ -1573,6 +1583,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; + dev_id_priv->afonly = id_priv->afonly; ret = rdma_listen(id, id_priv->backlog); if (ret) @@ -2098,6 +2109,26 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) } EXPORT_SYMBOL(rdma_set_reuseaddr); +int rdma_set_afonly(struct rdma_cm_id *id, int afonly) +{ + struct rdma_id_private *id_priv; + unsigned long flags; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { + id_priv->options |= (1 << CMA_OPTION_AFONLY); + id_priv->afonly = afonly; + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_set_afonly); + static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { @@ -2187,22 +2218,24 @@ static int cma_check_port(struct rdma_bind_list *bind_list, struct hlist_node *node; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - if (cma_any_addr(addr) && !reuseaddr) - return -EADDRNOTAVAIL; - hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { if (id_priv == cur_id) continue; - if ((cur_id->state == RDMA_CM_LISTEN) || - !reuseaddr || !cur_id->reuseaddr) { - cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; - if (cma_any_addr(cur_addr)) - return -EADDRNOTAVAIL; + if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && + cur_id->reuseaddr) + continue; - if (!cma_addr_cmp(addr, cur_addr)) - return -EADDRINUSE; - } + cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; + if (id_priv->afonly && cur_id->afonly && + (addr->sa_family != cur_addr->sa_family)) + continue; + + if (cma_any_addr(addr) || cma_any_addr(cur_addr)) + return -EADDRNOTAVAIL; + + if (!cma_addr_cmp(addr, cur_addr)) + return -EADDRINUSE; } return 0; } @@ -2278,7 +2311,7 @@ static int cma_get_port(struct rdma_id_private *id_priv) static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, struct sockaddr *addr) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 *sin6; if (addr->sa_family != AF_INET6) @@ -2371,6 +2404,14 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); + if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { + if (addr->sa_family == AF_INET) + id_priv->afonly = 1; +#if IS_ENABLED(CONFIG_IPV6) + else if (addr->sa_family == AF_INET6) + id_priv->afonly = init_net.ipv6.sysctl.bindv6only; +#endif + } ret = cma_get_port(id_priv); if (ret) goto err2; @@ -3023,10 +3064,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, cma_ib_mc_handler, mc); - if (IS_ERR(mc->multicast.ib)) - return PTR_ERR(mc->multicast.ib); - - return 0; + return PTR_RET(mc->multicast.ib); } static void iboe_mcast_work_handler(struct work_struct *work) diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index e497dfbee43..3ae2bfd3101 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -108,12 +108,14 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, unsigned char *prev_tail; prev_tail = skb_tail_pointer(skb); - *nlh = NLMSG_NEW(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), - len, NLM_F_MULTI); + *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), + len, NLM_F_MULTI); + if (!*nlh) + goto out_nlmsg_trim; (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; - return NLMSG_DATA(*nlh); + return nlmsg_data(*nlh); -nlmsg_failure: +out_nlmsg_trim: nlmsg_trim(skb, prev_tail); return NULL; } @@ -171,8 +173,11 @@ static void ibnl_rcv(struct sk_buff *skb) int __init ibnl_init(void) { - nls = netlink_kernel_create(&init_net, NETLINK_RDMA, 0, ibnl_rcv, - NULL, THIS_MODULE); + struct netlink_kernel_cfg cfg = { + .input = ibnl_rcv, + }; + + nls = netlink_kernel_create(&init_net, NETLINK_RDMA, THIS_MODULE, &cfg); if (!nls) { pr_warn("Failed to create netlink socket\n"); return -ENOMEM; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index fbbfa24cf57..a8905abc56e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -94,6 +94,12 @@ struct ib_sa_path_query { struct ib_sa_query sa_query; }; +struct ib_sa_guidinfo_query { + void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; @@ -347,6 +353,34 @@ static const struct ib_field service_rec_table[] = { .size_bits = 2*64 }, }; +#define GUIDINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ + .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .field_name = "sa_guidinfo_rec:" #field + +static const struct ib_field guidinfo_rec_table[] = { + { GUIDINFO_REC_FIELD(lid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { GUIDINFO_REC_FIELD(block_num), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res1), + .offset_words = 0, + .offset_bits = 24, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res2), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { GUIDINFO_REC_FIELD(guid_info_list), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 512 }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -945,6 +979,105 @@ err1: return ret; } +/* Support GuidInfoRecord */ +static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_guidinfo_query *query = + container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); + + if (mad) { + struct ib_sa_guidinfo_rec rec; + + ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); +} + +int ib_sa_guid_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_guidinfo_rec *rec, + ib_sa_comp_mask comp_mask, u8 method, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_guidinfo_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_guidinfo_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) { + return -EINVAL; + } + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; + query->sa_query.release = ib_sa_guidinfo_rec_release; + + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, + mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_guid_info_rec_query); + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 8002ae642cf..6bf85042289 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -909,6 +909,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, } ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); break; + case RDMA_OPTION_ID_AFONLY: + if (optlen != sizeof(int)) { + ret = -EINVAL; + break; + } + ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); + break; default: ret = -ENOSYS; } @@ -995,23 +1002,18 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - optval = kmalloc(cmd.optlen, GFP_KERNEL); - if (!optval) { - ret = -ENOMEM; - goto out1; - } - - if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval, - cmd.optlen)) { - ret = -EFAULT; - goto out2; + optval = memdup_user((void __user *) (unsigned long) cmd.optval, + cmd.optlen); + if (IS_ERR(optval)) { + ret = PTR_ERR(optval); + goto out; } ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, cmd.optlen); -out2: kfree(optval); -out1: + +out: ucma_put_ctx(ctx); return ret; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 740dcc065cf..77b6b182778 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1374,7 +1374,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) goto reject; } dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL); + l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1942,7 +1942,8 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail3; } ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL); + ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, + &cm_id->remote_addr.sin_addr.s_addr); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b18870c455a..51f42061dae 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -548,8 +548,8 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, } if (mpa_rev_to_use == 2) { - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); @@ -635,8 +635,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); @@ -715,8 +715,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); if (peer2peer && (ep->mpa_attr.p2p_type != diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 259b0670b51..c27141fef1a 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -147,47 +147,51 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) } /* - * Snoop SM MADs for port info and P_Key table sets, so we can - * synthesize LID change and P_Key change events. + * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can + * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, - u16 prev_lid) + u16 prev_lid) { - struct ib_event event; + struct ib_port_info *pinfo; + u16 lid; + struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - mad->mad_hdr.method == IB_MGMT_METHOD_SET) { - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { - struct ib_port_info *pinfo = - (struct ib_port_info *) ((struct ib_smp *) mad)->data; - u16 lid = be16_to_cpu(pinfo->lid); + mad->mad_hdr.method == IB_MGMT_METHOD_SET) + switch (mad->mad_hdr.attr_id) { + case IB_SMP_ATTR_PORT_INFO: + pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + lid = be16_to_cpu(pinfo->lid); - update_sm_ah(to_mdev(ibdev), port_num, + update_sm_ah(dev, port_num, be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); - event.device = ibdev; - event.element.port_num = port_num; + if (pinfo->clientrereg_resv_subnetto & 0x80) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_CLIENT_REREGISTER); - if (pinfo->clientrereg_resv_subnetto & 0x80) { - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } + if (prev_lid != lid) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_LID_CHANGE); + break; - if (prev_lid != lid) { - event.event = IB_EVENT_LID_CHANGE; - ib_dispatch_event(&event); - } - } + case IB_SMP_ATTR_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { - event.device = ibdev; - event.event = IB_EVENT_PKEY_CHANGE; - event.element.port_num = port_num; - ib_dispatch_event(&event); + case IB_SMP_ATTR_GUID_INFO: + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_GID_CHANGE); + break; + default: + break; } - } } static void node_desc_override(struct ib_device *dev, @@ -242,6 +246,25 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int err; struct ib_port_attr pattr; + if (in_wc && in_wc->qp->qp_num) { + pr_debug("received MAD: slid:%d sqpn:%d " + "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n", + in_wc->slid, in_wc->src_qp, + in_wc->dlid_path_bits, + in_wc->qp->qp_num, + in_wc->wc_flags, + in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, + be16_to_cpu(in_mad->mad_hdr.attr_id)); + if (in_wc->wc_flags & IB_WC_GRH) { + pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", + be64_to_cpu(in_grh->sgid.global.subnet_prefix), + be64_to_cpu(in_grh->sgid.global.interface_id)); + pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", + be64_to_cpu(in_grh->dgid.global.subnet_prefix), + be64_to_cpu(in_grh->dgid.global.interface_id)); + } + } + slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { @@ -286,7 +309,8 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } @@ -427,3 +451,64 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) ib_destroy_ah(dev->sm_ah[p]); } } + +void handle_port_mgmt_change_event(struct work_struct *work) +{ + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *dev = ew->ib_dev; + struct mlx4_eqe *eqe = &(ew->ib_eqe); + u8 port = eqe->event.port_mgmt_change.port; + u32 changed_attr; + + switch (eqe->subtype) { + case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: + changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr); + + /* Update the SM ah - This should be done before handling + the other changed attributes so that MADs can be sent to the SM */ + if (changed_attr & MSTR_SM_CHANGE_MASK) { + u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid); + u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf; + update_sm_ah(dev, port, lid, sl); + } + + /* Check if it is a lid change event */ + if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); + + /* Generate GUID changed event */ + if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + + if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) + mlx4_ib_dispatch_event(dev, port, + IB_EVENT_CLIENT_REREGISTER); + break; + + case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); + break; + case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + break; + default: + pr_warn("Unsupported subtype 0x%x for " + "Port Management Change event\n", eqe->subtype); + } + + kfree(ew); +} + +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type) +{ + struct ib_event event; + + event.device = &dev->ib_dev; + event.element.port_num = port_num; + event.event = type; + + ib_dispatch_event(&event); +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3530c41fcd1..fe2088cfa6e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -50,7 +50,7 @@ #include "mlx4_ib.h" #include "user.h" -#define DRV_NAME "mlx4_ib" +#define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "1.0" #define DRV_RELDATE "April 4, 2008" @@ -157,7 +157,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; - props->masked_atomic_cap = IB_ATOMIC_HCA; + props->masked_atomic_cap = props->atomic_cap; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; @@ -718,26 +718,53 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, return ret; } +struct mlx4_ib_steering { + struct list_head list; + u64 reg_id; + union ib_gid gid; +}; + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u64 reg_id; + struct mlx4_ib_steering *ib_steering = NULL; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); + if (!ib_steering) + return -ENOMEM; + } - err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, - !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - MLX4_PROT_IB_IPV6); + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + MLX4_PROT_IB_IPV6, ®_id); if (err) - return err; + goto err_malloc; err = add_gid_entry(ibqp, gid); if (err) goto err_add; + if (ib_steering) { + memcpy(ib_steering->gid.raw, gid->raw, 16); + ib_steering->reg_id = reg_id; + mutex_lock(&mqp->mutex); + list_add(&ib_steering->list, &mqp->steering_rules); + mutex_unlock(&mqp->mutex); + } return 0; err_add: - mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); +err_malloc: + kfree(ib_steering); + return err; } @@ -765,9 +792,30 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) u8 mac[6]; struct net_device *ndev; struct mlx4_ib_gid_entry *ge; + u64 reg_id = 0; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + struct mlx4_ib_steering *ib_steering; + + mutex_lock(&mqp->mutex); + list_for_each_entry(ib_steering, &mqp->steering_rules, list) { + if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { + list_del(&ib_steering->list); + break; + } + } + mutex_unlock(&mqp->mutex); + if (&ib_steering->list == &mqp->steering_rules) { + pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); + return -EINVAL; + } + reg_id = ib_steering->reg_id; + kfree(ib_steering); + } - err = mlx4_multicast_detach(mdev->dev, - &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); if (err) return err; @@ -898,7 +946,6 @@ static void update_gids_task(struct work_struct *work) union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; - struct ib_event event; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { @@ -916,10 +963,7 @@ static void update_gids_task(struct work_struct *work) pr_warn("set port command failed\n"); else { memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); - event.device = &gw->dev->ib_dev; - event.element.port_num = gw->port; - event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); } mlx4_free_cmd_mailbox(dev, mailbox); @@ -1111,7 +1155,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) sprintf(name, "mlx4-ib-%d-%d@%s", i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) { + if (mlx4_assign_eq(dev, name, NULL, + &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = @@ -1383,10 +1428,18 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, - enum mlx4_dev_event event, int port) + enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + struct mlx4_eqe *eqe = NULL; + struct ib_event_work *ew; + int port = 0; + + if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) + eqe = (struct mlx4_eqe *)param; + else + port = (u8)param; if (port > ibdev->num_ports) return; @@ -1405,6 +1458,19 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, ibev.event = IB_EVENT_DEVICE_FATAL; break; + case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: + ew = kmalloc(sizeof *ew, GFP_ATOMIC); + if (!ew) { + pr_err("failed to allocate memory for events work\n"); + break; + } + + INIT_WORK(&ew->work, handle_port_mgmt_change_event); + memcpy(&ew->ib_eqe, eqe, sizeof *eqe); + ew->ib_dev = ibdev; + handle_port_mgmt_change_event(&ew->work); + return; + default: return; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ff36655d23d..c136bb618e2 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -44,6 +44,16 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> +#define MLX4_IB_DRV_NAME "mlx4_ib" + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__ + +#define mlx4_ib_warn(ibdev, format, arg...) \ + dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) + enum { MLX4_IB_SQ_MIN_WQE_SHIFT = 6, MLX4_IB_MAX_HEADROOM = 2048 @@ -163,6 +173,7 @@ struct mlx4_ib_qp { u8 state; int mlx_type; struct list_head gid_list; + struct list_head steering_rules; }; struct mlx4_ib_srq { @@ -214,6 +225,12 @@ struct mlx4_ib_dev { int eq_added; }; +struct ib_event_work { + struct work_struct work; + struct mlx4_ib_dev *ib_dev; + struct mlx4_eqe ib_eqe; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -371,4 +388,7 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid); +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8d4ed24aef9..a6d8ea060ea 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -495,6 +495,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -1335,11 +1336,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + pr_debug("qpn 0x%x: invalid attribute mask specified " + "for transition %d to %d. qp_type %d," + " attr_mask 0x%x\n", + ibqp->qp_num, cur_state, new_state, + ibqp->qp_type, attr_mask); goto out; + } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + pr_debug("qpn 0x%x: invalid port number (%d) specified " + "for transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->port_num, cur_state, + new_state, ibqp->qp_type); goto out; } @@ -1350,17 +1361,30 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) + if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { + pr_debug("qpn 0x%x: invalid pkey index (%d) specified " + "for transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->pkey_index, cur_state, + new_state, ibqp->qp_type); goto out; + } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { + pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. " + "Transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->max_rd_atomic, cur_state, + new_state, ibqp->qp_type); goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { + pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. " + "Transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->max_dest_rd_atomic, cur_state, + new_state, ibqp->qp_type); goto out; } diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 9601049e14d..26a68453610 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -247,7 +247,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, spin_unlock(&dev->qp_table.lock); if (!qp) { - mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); + mthca_warn(dev, "Async event %d for bogus QP %08x\n", + event_type, qpn); return; } @@ -501,6 +502,7 @@ done: qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; + qp_init_attr->sq_sig_type = qp->sq_policy; out_mailbox: mthca_free_mailbox(dev, mailbox); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b050e629e9c..5a044526e4f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -202,8 +202,7 @@ static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev) return 0; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) || \ -defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_VLAN_8021Q) static int ocrdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, void *ptr) @@ -549,7 +548,7 @@ static struct ocrdma_driver ocrdma_drv = { static void ocrdma_unregister_inet6addr_notifier(void) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier); #endif } @@ -558,7 +557,7 @@ static int __init ocrdma_init_module(void) { int status; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier); if (status) return status; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2e2e7aecc99..cb5b7f7d4d3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -97,7 +97,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp; attr->max_srq = (dev->attr.max_qp - 1); - attr->max_srq_sge = attr->max_srq_sge; + attr->max_srq_sge = dev->attr.max_srq_sge; attr->max_srq_wr = dev->attr.max_rqe; attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay; attr->max_fast_reg_page_list_len = 0; @@ -893,7 +893,9 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, /* verify consumer QPs are not trying to use GSI QP's CQ */ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) { if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) || - (dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq))) { + (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || + (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || + (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n", __func__, dev->id); return -EINVAL; diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 7e62f413714..7b1b8669002 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,8 +1,8 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -519,6 +519,7 @@ struct qib_pportdata { struct qib_devdata *dd; struct qib_chippport_specific *cpspec; /* chip-specific per-port */ struct kobject pport_kobj; + struct kobject pport_cc_kobj; struct kobject sl2vl_kobj; struct kobject diagc_kobj; @@ -544,6 +545,7 @@ struct qib_pportdata { /* read mostly */ struct qib_sdma_desc *sdma_descq; + struct workqueue_struct *qib_wq; struct qib_sdma_state sdma_state; dma_addr_t sdma_descq_phys; volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ @@ -637,6 +639,39 @@ struct qib_pportdata { struct timer_list led_override_timer; struct xmit_wait cong_stats; struct timer_list symerr_clear_timer; + + /* Synchronize access between driver writes and sysfs reads */ + spinlock_t cc_shadow_lock + ____cacheline_aligned_in_smp; + + /* Shadow copy of the congestion control table */ + struct cc_table_shadow *ccti_entries_shadow; + + /* Shadow copy of the congestion control entries */ + struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow; + + /* List of congestion control table entries */ + struct ib_cc_table_entry_shadow *ccti_entries; + + /* 16 congestion entries with each entry corresponding to a SL */ + struct ib_cc_congestion_entry_shadow *congestion_entries; + + /* Maximum number of congestion control entries that the agent expects + * the manager to send. + */ + u16 cc_supported_table_entries; + + /* Total number of congestion control table entries */ + u16 total_cct_entry; + + /* Bit map identifying service level */ + u16 cc_sl_control_map; + + /* maximum congestion control table index */ + u16 ccti_limit; + + /* CA's max number of 64 entry units in the congestion control table */ + u8 cc_max_table_entries; }; /* Observers. Not to be taken lightly, possibly not to ship. */ @@ -1077,6 +1112,7 @@ extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; extern unsigned qib_wc_pat; +extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); int qib_enable_wc(struct qib_devdata *dd); @@ -1267,6 +1303,11 @@ int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, /* ppd->sdma_lock should be locked before calling this. */ int qib_sdma_make_progress(struct qib_pportdata *dd); +static inline int qib_sdma_empty(const struct qib_pportdata *ppd) +{ + return ppd->sdma_descq_added == ppd->sdma_descq_removed; +} + /* must be called under qib_sdma_lock */ static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd) { diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 9892456a434..1686fd4bda8 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2010 QLogic Corporation. All rights reserved. - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -53,6 +53,9 @@ #include "qib.h" #include "qib_common.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + /* * Each client that opens the diag device must read then write * offset 0, to prevent lossage from random cat or od. diag_state @@ -598,8 +601,8 @@ static ssize_t qib_diagpkt_write(struct file *fp, } tmpbuf = vmalloc(plen); if (!tmpbuf) { - qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, " - "failing\n"); + qib_devinfo(dd->pcidev, + "Unable to allocate tmp buffer, failing\n"); ret = -ENOMEM; goto bail; } @@ -693,7 +696,7 @@ int qib_register_observer(struct qib_devdata *dd, ret = -ENOMEM; olp = vmalloc(sizeof *olp); if (!olp) { - printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n"); + pr_err("vmalloc for observer failed\n"); goto bail; } if (olp) { diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 8895cfec501..e41e7f7fc76 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -764,8 +764,9 @@ int qib_reset_device(int unit) qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { - qib_devinfo(dd->pcidev, "Invalid unit number %u or " - "not initialized or not present\n", unit); + qib_devinfo(dd->pcidev, + "Invalid unit number %u or not initialized or not present\n", + unit); ret = -ENXIO; goto bail; } @@ -802,11 +803,13 @@ int qib_reset_device(int unit) else ret = -EAGAIN; if (ret) - qib_dev_err(dd, "Reinitialize unit %u after " - "reset failed with %d\n", unit, ret); + qib_dev_err(dd, + "Reinitialize unit %u after reset failed with %d\n", + unit, ret); else - qib_devinfo(dd->pcidev, "Reinitialized unit %u after " - "resetting\n", unit); + qib_devinfo(dd->pcidev, + "Reinitialized unit %u after resetting\n", + unit); bail: return ret; diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 92d9cfe98a6..4d5d71aaa2b 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -160,10 +161,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) if (oguid > bguid[7]) { if (bguid[6] == 0xff) { if (bguid[5] == 0xff) { - qib_dev_err(dd, "Can't set %s GUID" - " from base, wraps to" - " OUI!\n", - qib_get_unit_name(t)); + qib_dev_err(dd, + "Can't set %s GUID from base, wraps to OUI!\n", + qib_get_unit_name(t)); dd->base_guid = 0; goto bail; } @@ -182,8 +182,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) len = sizeof(struct qib_flash); buf = vmalloc(len); if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for GUID\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", + len); goto bail; } @@ -201,23 +202,25 @@ void qib_get_eeprom_info(struct qib_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - qib_devinfo(dd->pcidev, "Bad I2C flash checksum: " - "0x%x, not 0x%x\n", csum, ifp->if_csum); + qib_devinfo(dd->pcidev, + "Bad I2C flash checksum: 0x%x, not 0x%x\n", + csum, ifp->if_csum); goto done; } if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { - qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n", - *(unsigned long long *) ifp->if_guid); + qib_dev_err(dd, + "Invalid GUID %llx from flash; ignoring\n", + *(unsigned long long *) ifp->if_guid); /* don't allow GUID if all 0 or all 1's */ goto done; } /* complain, but allow it */ if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) - qib_devinfo(dd->pcidev, "Warning, GUID %llx is " - "default, probably not correct!\n", - *(unsigned long long *) ifp->if_guid); + qib_devinfo(dd->pcidev, + "Warning, GUID %llx is default, probably not correct!\n", + *(unsigned long long *) ifp->if_guid); bguid = ifp->if_guid; if (!bguid[0] && !bguid[1] && !bguid[2]) { @@ -260,8 +263,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) memcpy(dd->serial, ifp->if_serial, sizeof ifp->if_serial); if (!strstr(ifp->if_comment, "Tested successfully")) - qib_dev_err(dd, "Board SN %s did not pass functional " - "test: %s\n", dd->serial, ifp->if_comment); + qib_dev_err(dd, + "Board SN %s did not pass functional test: %s\n", + dd->serial, ifp->if_comment); memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); /* @@ -323,8 +327,9 @@ int qib_update_eeprom_log(struct qib_devdata *dd) buf = vmalloc(len); ret = 1; if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for logging\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for logging\n", + len); goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index a7403248d83..faa44cb0807 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,6 +49,9 @@ #include "qib_common.h" #include "qib_user_sdma.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + static int qib_open(struct inode *, struct file *); static int qib_close(struct inode *, struct file *); static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); @@ -315,8 +318,9 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, } if (cnt > tidcnt) { /* make sure it all fits in tid_pg_list */ - qib_devinfo(dd->pcidev, "Process tried to allocate %u " - "TIDs, only trying max (%u)\n", cnt, tidcnt); + qib_devinfo(dd->pcidev, + "Process tried to allocate %u TIDs, only trying max (%u)\n", + cnt, tidcnt); cnt = tidcnt; } pagep = (struct page **) rcd->tid_pg_list; @@ -750,9 +754,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd, ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x " - "bytes failed: %d\n", what, rcd->ctxt, - pfn, len, ret); + qib_devinfo(dd->pcidev, + "%s ctxt%u mmap of %lx, %x bytes failed: %d\n", + what, rcd->ctxt, pfn, len, ret); bail: return ret; } @@ -771,8 +775,9 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd, */ sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; if ((vma->vm_end - vma->vm_start) > sz) { - qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen " - "%lx > PAGE\n", vma->vm_end - vma->vm_start); + qib_devinfo(dd->pcidev, + "FAIL mmap userreg: reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); ret = -EFAULT; } else { phys = dd->physaddr + ureg; @@ -802,8 +807,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, * for it. */ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { - qib_devinfo(dd->pcidev, "FAIL mmap piobufs: " - "reqlen %lx > PAGE\n", + qib_devinfo(dd->pcidev, + "FAIL mmap piobufs: reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); ret = -EINVAL; goto bail; @@ -847,8 +852,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, size = rcd->rcvegrbuf_size; total_size = rcd->rcvegrbuf_chunks * size; if ((vma->vm_end - vma->vm_start) > total_size) { - qib_devinfo(dd->pcidev, "FAIL on egr bufs: " - "reqlen %lx > actual %lx\n", + qib_devinfo(dd->pcidev, + "FAIL on egr bufs: reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); ret = -EINVAL; @@ -856,8 +861,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, } if (vma->vm_flags & VM_WRITE) { - qib_devinfo(dd->pcidev, "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); + qib_devinfo(dd->pcidev, + "Can't map eager buffers as writable (flags=%lx)\n", + vma->vm_flags); ret = -EPERM; goto bail; } @@ -1270,8 +1276,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, GFP_KERNEL); if (!rcd || !ptmp) { - qib_dev_err(dd, "Unable to allocate ctxtdata " - "memory, failing open\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata memory, failing open\n"); ret = -ENOMEM; goto bailerr; } @@ -1560,10 +1566,10 @@ done_chk_sdma: } else if (weight == 1 && test_bit(cpumask_first(tsk_cpus_allowed(current)), qib_cpulist)) - qib_devinfo(dd->pcidev, "%s PID %u affinity " - "set to cpu %d; already allocated\n", - current->comm, current->pid, - cpumask_first(tsk_cpus_allowed(current))); + qib_devinfo(dd->pcidev, + "%s PID %u affinity set to cpu %d; already allocated\n", + current->comm, current->pid, + cpumask_first(tsk_cpus_allowed(current))); } mutex_unlock(&qib_mutex); @@ -2185,8 +2191,7 @@ int qib_cdev_init(int minor, const char *name, cdev = cdev_alloc(); if (!cdev) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not allocate cdev for minor %d, %s\n", + pr_err("Could not allocate cdev for minor %d, %s\n", minor, name); ret = -ENOMEM; goto done; @@ -2198,8 +2203,7 @@ int qib_cdev_init(int minor, const char *name, ret = cdev_add(cdev, dev, 1); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not add cdev for minor %d, %s (err %d)\n", + pr_err("Could not add cdev for minor %d, %s (err %d)\n", minor, name, -ret); goto err_cdev; } @@ -2209,8 +2213,7 @@ int qib_cdev_init(int minor, const char *name, goto done; ret = PTR_ERR(device); device = NULL; - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device for minor %d, %s (err %d)\n", + pr_err("Could not create device for minor %d, %s (err %d)\n", minor, name, -ret); err_cdev: cdev_del(cdev); @@ -2245,16 +2248,14 @@ int __init qib_dev_init(void) ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME ": Could not allocate " - "chrdev region (err %d)\n", -ret); + pr_err("Could not allocate chrdev region (err %d)\n", -ret); goto done; } qib_class = class_create(THIS_MODULE, "ipath"); if (IS_ERR(qib_class)) { ret = PTR_ERR(qib_class); - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device class (err %d)\n", -ret); + pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(qib_dev, QIB_NMINORS); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 05e0f17c5b4..cff8a6c3216 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -382,7 +383,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, &simple_dir_operations, dd); if (ret) { - printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); + pr_err("create_file(%s) failed: %d\n", unit, ret); goto bail; } @@ -390,21 +391,21 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counters) failed: %d\n", + pr_err("create_file(%s/counters) failed: %d\n", unit, ret); goto bail; } ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n", + pr_err("create_file(%s/counter_names) failed: %d\n", unit, ret); goto bail; } ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, "portcounter_names", ret); goto bail; } @@ -416,7 +417,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[i], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -426,7 +427,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &qsfp_ops[i - 1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -435,7 +436,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, &flash_ops, dd); if (ret) - printk(KERN_ERR "create_file(%s/flash) failed: %d\n", + pr_err("create_file(%s/flash) failed: %d\n", unit, ret); bail: return ret; @@ -486,7 +487,7 @@ static int remove_device_files(struct super_block *sb, if (IS_ERR(dir)) { ret = PTR_ERR(dir); - printk(KERN_ERR "Lookup of %s failed\n", unit); + pr_err("Lookup of %s failed\n", unit); goto bail; } @@ -532,7 +533,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) ret = simple_fill_super(sb, QIBFS_MAGIC, files); if (ret) { - printk(KERN_ERR "simple_fill_super failed: %d\n", ret); + pr_err("simple_fill_super failed: %d\n", ret); goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 4d352b90750..a099ac171e2 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -753,8 +753,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) return; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); return; } qib_stats.sps_hwerrs++; @@ -779,13 +779,14 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, * or it's occurred within the last 5 seconds. */ if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT)); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { @@ -815,8 +816,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, InfiniPath hardware" - " unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -868,8 +870,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, *msg = 0; /* recovered from all of them */ if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1017,9 +1020,9 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) qib_inc_eeprom_err(dd, log_idx, 1); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", - (unsigned long long) (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_6120_senderrbufs(ppd); @@ -1089,8 +1092,8 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1541,8 +1544,9 @@ static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); handle_6120_errors(dd, estat); } @@ -1715,16 +1719,16 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd) } if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret; ret = request_irq(dd->cspec->irq, qib_6120intr, 0, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup interrupt " - "(irq=%d): %d\n", dd->cspec->irq, - ret); + qib_dev_err(dd, + "Couldn't setup interrupt (irq=%d): %d\n", + dd->cspec->irq, ret); } } @@ -1759,8 +1763,9 @@ static void pe_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware revision " - "%u.%u!\n", dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -1833,8 +1838,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* clear the reset error, init error/hwerror mask */ qib_6120_init_hwerrors(dd); /* for Rev2 error interrupts; nop for rev 1 */ @@ -1876,8 +1881,9 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -1941,8 +1947,9 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2928,8 +2935,9 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->dd->unit, ppd->port); } else if (!strncmp(what, "off", 3)) { ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -3186,11 +3194,10 @@ static int qib_late_6120_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } return ret; @@ -3218,8 +3225,8 @@ static int init_6120_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -3551,8 +3558,8 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, goto bail; if (qib_pcie_params(dd, 8, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); dd->cspec->irq = pdev->irq; /* save IRQ */ /* clear diagctrl register, in case diags were running and crashed */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 86a0ba7ca0c..64d0ecb90cd 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1111,9 +1111,9 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) sdma_7220_errors(ppd, errs); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_7220_senderrbufs(ppd); @@ -1192,8 +1192,8 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1305,8 +1305,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -1329,13 +1329,14 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_inc_eeprom_err(dd, log_idx, 1); if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | RXE_PARITY)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT)); if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) qib_sd7220_clr_ibpar(dd); @@ -1362,8 +1363,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -1409,8 +1411,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * For /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1918,8 +1921,9 @@ static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); else handle_7220_errors(dd, estat); } @@ -2023,17 +2027,18 @@ bail: static void qib_setup_7220_interrupt(struct qib_devdata *dd) { if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret = request_irq(dd->cspec->irq, qib_7220intr, dd->msi_lo ? 0 : IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup %s interrupt " - "(irq=%d): %d\n", dd->msi_lo ? - "MSI" : "INTx", dd->cspec->irq, ret); + qib_dev_err(dd, + "Couldn't setup %s interrupt (irq=%d): %d\n", + dd->msi_lo ? "MSI" : "INTx", + dd->cspec->irq, ret); } } @@ -2072,9 +2077,9 @@ static void qib_7220_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware " - "revision %u.%u!\n", - dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -2146,8 +2151,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* hold IBC in reset, no sends, etc till later */ qib_write_kreg(dd, kr_control, 0ULL); @@ -2187,8 +2192,9 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2706,8 +2712,9 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); /* enable heart beat again */ val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -3307,8 +3314,8 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) if (!dd->msi_lo) return 0; - qib_devinfo(dd->pcidev, "MSI interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSI interrupt not detected, trying INTx interrupts\n"); qib_7220_free_irq(dd); qib_enable_intx(dd->pcidev); /* @@ -3980,11 +3987,10 @@ static int qib_late_7220_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } qib_register_observer(dd, &sendctrl_observer); @@ -4014,8 +4020,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -4613,8 +4619,8 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, break; } if (qib_pcie_params(dd, minwidth, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* save IRQ for possible later use */ dd->cspec->irq = pdev->irq; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index c881e744c09..0d7280af99b 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -49,6 +50,10 @@ #include "qib_qsfp.h" #include "qib_mad.h" +#include "qib_verbs.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME " " fmt static void qib_setup_7322_setextled(struct qib_pportdata *, u32); static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t); @@ -1575,8 +1580,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) qib_stats.sps_errints++; errs = qib_read_kreg64(dd, kr_errstatus); if (!errs) { - qib_devinfo(dd->pcidev, "device error interrupt, " - "but no error bits set!\n"); + qib_devinfo(dd->pcidev, + "device error interrupt, but no error bits set!\n"); goto done; } @@ -1622,8 +1627,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) if (errs & QIB_E_RESET) { int pidx; - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1760,9 +1765,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) ppd->dd->cspec->r1 ? QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); - printk(KERN_INFO QIB_DRV_NAME - " IB%u:%u re-enabled QDR adaptation " - "ibclt %x\n", ppd->dd->unit, ppd->port, ibclt); + pr_info( + "IB%u:%u re-enabled QDR adaptation ibclt %x\n", + ppd->dd->unit, ppd->port, ibclt); } } } @@ -1804,9 +1809,9 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd) if (!*msg) snprintf(msg, sizeof ppd->cpspec->epmsgbuf, "no others"); - qib_dev_porterr(dd, ppd->port, "error interrupt with unknown" - " errors 0x%016Lx set (and %s)\n", - (errs & ~QIB_E_P_BITSEXTANT), msg); + qib_dev_porterr(dd, ppd->port, + "error interrupt with unknown errors 0x%016Lx set (and %s)\n", + (errs & ~QIB_E_P_BITSEXTANT), msg); *msg = '\0'; } @@ -2024,8 +2029,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -2039,8 +2044,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, /* no EEPROM logging, yet */ if (hwerrs) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) { @@ -2064,8 +2070,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcpy(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcpy(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -2078,8 +2085,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -2667,8 +2675,9 @@ static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat) char msg[128]; kills = istat & ~QIB_I_BITSEXTANT; - qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:" - " %s\n", (unsigned long long) kills, msg); + qib_dev_err(dd, + "Clearing reserved interrupt(s) 0x%016llx: %s\n", + (unsigned long long) kills, msg); qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills)); } @@ -3101,16 +3110,16 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) /* Try to get INTx interrupt */ try_intx: if (!dd->pcidev->irq) { - qib_dev_err(dd, "irq is 0, BIOS error? " - "Interrupts won't work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); goto bail; } ret = request_irq(dd->pcidev->irq, qib_7322intr, IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) { - qib_dev_err(dd, "Couldn't setup INTx " - "interrupt (irq=%d): %d\n", - dd->pcidev->irq, ret); + qib_dev_err(dd, + "Couldn't setup INTx interrupt (irq=%d): %d\n", + dd->pcidev->irq, ret); goto bail; } dd->cspec->irq = dd->pcidev->irq; @@ -3185,8 +3194,9 @@ try_intx: * Shouldn't happen since the enable said we could * have as many as we are trying to setup here. */ - qib_dev_err(dd, "Couldn't setup MSIx " - "interrupt (vec=%d, irq=%d): %d\n", msixnum, + qib_dev_err(dd, + "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", + msixnum, dd->cspec->msix_entries[msixnum].msix.vector, ret); qib_7322_nomsix(dd); @@ -3305,8 +3315,9 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { - qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode" - " by module parameter\n", dd->unit); + qib_devinfo(dd->pcidev, + "IB%u: Forced to single port mode by module parameter\n", + dd->unit); features &= PORT_SPD_CAP; } @@ -3400,8 +3411,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (val == dd->revision) break; if (i == 5) { - qib_dev_err(dd, "Failed to initialize after reset, " - "unusable\n"); + qib_dev_err(dd, + "Failed to initialize after reset, unusable\n"); ret = 0; goto bail; } @@ -3432,8 +3443,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (qib_pcie_params(dd, dd->lbus_width, &dd->cspec->num_msix_entries, dd->cspec->msix_entries)) - qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); qib_setup_7322_interrupt(dd, 1); @@ -3474,8 +3485,9 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -4029,8 +4041,9 @@ static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what) Loopback); /* enable heart beat again */ val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -4714,8 +4727,8 @@ static void init_7322_cntrnames(struct qib_devdata *dd) dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); if (!dd->pport[i].cpspec->portcntrs) - qib_dev_err(dd, "Failed allocation for" - " portcounters\n"); + qib_dev_err(dd, + "Failed allocation for portcounters\n"); } } @@ -4865,8 +4878,8 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) if (!dd->cspec->num_msix_entries) return 0; /* already using INTx */ - qib_devinfo(dd->pcidev, "MSIx interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_nomsix(dd); qib_enable_intx(dd->pcidev); qib_setup_7322_interrupt(dd, 0); @@ -5151,15 +5164,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd) goto retry; if (!ibp->smi_ah) { - struct ib_ah_attr attr; struct ib_ah *ah; - memset(&attr, 0, sizeof attr); - attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE); - attr.port_num = ppd->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->smi_ah = to_iah(ah); @@ -5844,22 +5853,21 @@ static int setup_txselect(const char *str, struct kernel_param *kp) { struct qib_devdata *dd; unsigned long val; - char *n; + int ret; + if (strlen(str) >= MAX_ATTEN_LEN) { - printk(KERN_INFO QIB_DRV_NAME " txselect_values string " - "too long\n"); + pr_info("txselect_values string too long\n"); return -ENOSPC; } - val = simple_strtoul(str, &n, 0); - if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + ret = kstrtoul(str, 0, &val); + if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ)) { - printk(KERN_INFO QIB_DRV_NAME - "txselect_values must start with a number < %d\n", + pr_info("txselect_values must start with a number < %d\n", TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); - return -EINVAL; + return ret ? ret : -EINVAL; } - strcpy(txselect_list, str); + strcpy(txselect_list, str); list_for_each_entry(dd, &qib_dev_list, list) if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) set_no_qsfp_atten(dd, 1); @@ -5882,11 +5890,10 @@ static int qib_late_7322_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } @@ -6098,8 +6105,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -6265,9 +6272,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd) */ if (!(dd->flags & QIB_HAS_QSFP)) { if (!IS_QMH(dd) && !IS_QME(dd)) - qib_devinfo(dd->pcidev, "IB%u:%u: " - "Unknown mezzanine card type\n", - dd->unit, ppd->port); + qib_devinfo(dd->pcidev, + "IB%u:%u: Unknown mezzanine card type\n", + dd->unit, ppd->port); cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; /* * Choose center value as default tx serdes setting @@ -6922,8 +6929,8 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, dd->cspec->msix_entries[i].msix.entry = i; if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* may be less than we wanted, if not enough available */ dd->cspec->num_msix_entries = tabsize; @@ -7276,8 +7283,7 @@ static void find_best_ent(struct qib_pportdata *ppd, ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ)) { idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); - printk(KERN_INFO QIB_DRV_NAME - " IB%u:%u use idx %u into txdds_mfg\n", + pr_info("IB%u:%u use idx %u into txdds_mfg\n", ppd->dd->unit, ppd->port, idx); *sdr_dds = &txdds_extra_mfg[idx]; *ddr_dds = &txdds_extra_mfg[idx]; @@ -7432,11 +7438,11 @@ static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable) u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); if (enable && !state) { - printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n", + pr_info("IB%u:%u Turning LOS on\n", ppd->dd->unit, ppd->port); data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); } else if (!enable && state) { - printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n", + pr_info("IB%u:%u Turning LOS off\n", ppd->dd->unit, ppd->port); data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); } @@ -7672,8 +7678,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) } } if (chan_done) { - printk(KERN_INFO QIB_DRV_NAME - " Serdes %d calibration not done after .5 sec: 0x%x\n", + pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n", IBSD(ppd->hw_pidx), chan_done); } else { for (chan = 0; chan < SERDES_CHANS; ++chan) { @@ -7681,9 +7686,8 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) (chan + (chan >> 1)), 25, 0, 0); if ((~rxcaldone & (u32)BMASK(10, 10)) == 0) - printk(KERN_INFO QIB_DRV_NAME - " Serdes %d chan %d calibration " - "failed\n", IBSD(ppd->hw_pidx), chan); + pr_info("Serdes %d chan %d calibration failed\n", + IBSD(ppd->hw_pidx), chan); } } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index dc14e100a7f..4443adfcd9e 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -38,9 +38,14 @@ #include <linux/delay.h> #include <linux/idr.h> #include <linux/module.h> +#include <linux/printk.h> #include "qib.h" #include "qib_common.h" +#include "qib_mad.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt /* * min buffers we want to have per context, after driver @@ -71,6 +76,9 @@ unsigned qib_n_krcv_queues; module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); +unsigned qib_cc_table_size; +module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); /* * qib_wc_pat parameter: * 0 is WC via MTRR @@ -120,8 +128,8 @@ int qib_create_ctxts(struct qib_devdata *dd) */ dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); if (!dd->rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata array, " - "failing\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata array, failing\n"); ret = -ENOMEM; goto done; } @@ -137,8 +145,8 @@ int qib_create_ctxts(struct qib_devdata *dd) ppd = dd->pport + (i % dd->num_pports); rcd = qib_create_ctxtdata(ppd, i); if (!rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata" - " for Kernel ctxt, failing\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); ret = -ENOMEM; goto done; } @@ -199,6 +207,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { + int size; ppd->dd = dd; ppd->hw_pidx = hw_pidx; ppd->port = port; /* IB port number, not index */ @@ -210,6 +219,83 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, init_timer(&ppd->symerr_clear_timer); ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; ppd->symerr_clear_timer.data = (unsigned long)ppd; + + ppd->qib_wq = NULL; + + spin_lock_init(&ppd->cc_shadow_lock); + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) + goto bail; + + ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size, + IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT); + + ppd->cc_max_table_entries = + ppd->cc_supported_table_entries/IB_CCT_ENTRIES; + + size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) + * IB_CCT_ENTRIES; + ppd->ccti_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries) { + qib_dev_err(dd, + "failed to allocate congestion control table for port %d!\n", + port); + goto bail; + } + + size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); + ppd->congestion_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries) { + qib_dev_err(dd, + "failed to allocate congestion setting list for port %d!\n", + port); + goto bail_1; + } + + size = sizeof(struct cc_table_shadow); + ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow ccti list for port %d!\n", + port); + goto bail_2; + } + + size = sizeof(struct ib_cc_congestion_setting_attr); + ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow congestion setting list for port %d!\n", + port); + goto bail_3; + } + + return; + +bail_3: + kfree(ppd->ccti_entries_shadow); + ppd->ccti_entries_shadow = NULL; +bail_2: + kfree(ppd->congestion_entries); + ppd->congestion_entries = NULL; +bail_1: + kfree(ppd->ccti_entries); + ppd->ccti_entries = NULL; +bail: + /* User is intentionally disabling the congestion control agent */ + if (!qib_cc_table_size) + return; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { + qib_cc_table_size = 0; + qib_dev_err(dd, + "Congestion Control table size %d less than minimum %d for port %d\n", + qib_cc_table_size, IB_CCT_MIN_ENTRIES, port); + } + + qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", + port); + return; } static int init_pioavailregs(struct qib_devdata *dd) @@ -221,8 +307,8 @@ static int init_pioavailregs(struct qib_devdata *dd) &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, GFP_KERNEL); if (!dd->pioavailregs_dma) { - qib_dev_err(dd, "failed to allocate PIOavail reg area " - "in memory\n"); + qib_dev_err(dd, + "failed to allocate PIOavail reg area in memory\n"); ret = -ENOMEM; goto done; } @@ -277,15 +363,15 @@ static void init_shadow_tids(struct qib_devdata *dd) pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); if (!pages) { - qib_dev_err(dd, "failed to allocate shadow page * " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow page * array, no expected sends!\n"); goto bail; } addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); if (!addrs) { - qib_dev_err(dd, "failed to allocate shadow dma handle " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow dma handle array, no expected sends!\n"); goto bail_free; } @@ -309,13 +395,13 @@ static int loadtime_init(struct qib_devdata *dd) if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { - qib_dev_err(dd, "Driver only handles version %d, " - "chip swversion is %d (%llx), failng\n", - QIB_CHIP_SWVERSION, - (int)(dd->revision >> + qib_dev_err(dd, + "Driver only handles version %d, chip swversion is %d (%llx), failng\n", + QIB_CHIP_SWVERSION, + (int)(dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & - QLOGIC_IB_R_SOFTWARE_MASK, - (unsigned long long) dd->revision); + QLOGIC_IB_R_SOFTWARE_MASK, + (unsigned long long) dd->revision); ret = -ENOSYS; goto done; } @@ -419,8 +505,8 @@ static void verify_interrupt(unsigned long opaque) */ if (dd->int_counter == 0) { if (!dd->f_intr_fallback(dd)) - dev_err(&dd->pcidev->dev, "No interrupts detected, " - "not usable.\n"); + dev_err(&dd->pcidev->dev, + "No interrupts detected, not usable.\n"); else /* re-arm the timer to see if fallback works */ mod_timer(&dd->intrchk_timer, jiffies + HZ/2); } @@ -483,6 +569,41 @@ static void init_piobuf_state(struct qib_devdata *dd) } /** + * qib_create_workqueues - create per port workqueues + * @dd: the qlogic_ib device + */ +static int qib_create_workqueues(struct qib_devdata *dd) +{ + int pidx; + struct qib_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (!ppd->qib_wq) { + char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */ + snprintf(wq_name, sizeof(wq_name), "qib%d_%d", + dd->unit, pidx); + ppd->qib_wq = + create_singlethread_workqueue(wq_name); + if (!ppd->qib_wq) + goto wq_error; + } + } + return 0; +wq_error: + pr_err("create_singlethread_workqueue failed for port %d\n", + pidx + 1); + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + } + return -ENOMEM; +} + +/** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device * @reinit: reinitializing, so don't allocate new memory @@ -547,8 +668,8 @@ int qib_init(struct qib_devdata *dd, int reinit) if (!lastfail) lastfail = qib_setup_eagerbufs(rcd); if (lastfail) { - qib_dev_err(dd, "failed to allocate kernel ctxt's " - "rcvhdrq and/or egr bufs\n"); + qib_dev_err(dd, + "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); continue; } } @@ -764,6 +885,11 @@ static void qib_shutdown_device(struct qib_devdata *dd) * We can't count on interrupts since we are stopping. */ dd->f_quiet_serdes(ppd); + + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } } qib_update_eeprom_log(dd); @@ -893,8 +1019,7 @@ static void qib_verify_pioperf(struct qib_devdata *dd) /* 1 GiB/sec, slightly over IB SDR line rate */ if (lcnt < (emsecs * 1024U)) qib_dev_err(dd, - "Performance problem: bandwidth to PIO buffers is " - "only %u MiB/sec\n", + "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", lcnt / (u32) emsecs); preempt_enable(); @@ -967,8 +1092,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (qib_cpulist) qib_cpulist_count = count; else - qib_early_err(&pdev->dev, "Could not alloc cpulist " - "info, cpu affinity might be wrong\n"); + qib_early_err(&pdev->dev, + "Could not alloc cpulist info, cpu affinity might be wrong\n"); } bail: @@ -1057,21 +1182,20 @@ static int __init qlogic_ib_init(void) */ idr_init(&qib_unit_table); if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); + pr_err("idr_pre_get() failed\n"); ret = -ENOMEM; goto bail_cq_wq; } ret = pci_register_driver(&qib_driver); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Unable to register driver: error %d\n", -ret); + pr_err("Unable to register driver: error %d\n", -ret); goto bail_unit; } /* not fatal if it doesn't work */ if (qib_init_qibfs()) - printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); + pr_err("Unable to register ipathfs\n"); goto bail; /* all OK */ bail_unit: @@ -1095,9 +1219,9 @@ static void __exit qlogic_ib_cleanup(void) ret = qib_exit_qibfs(); if (ret) - printk(KERN_ERR QIB_DRV_NAME ": " - "Unable to cleanup counter filesystem: " - "error %d\n", -ret); + pr_err( + "Unable to cleanup counter filesystem: error %d\n", + -ret); pci_unregister_driver(&qib_driver); @@ -1121,10 +1245,24 @@ static void cleanup_device_data(struct qib_devdata *dd) unsigned long flags; /* users can't do anything more with chip */ - for (pidx = 0; pidx < dd->num_pports; ++pidx) + for (pidx = 0; pidx < dd->num_pports; ++pidx) { if (dd->pport[pidx].statusp) *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; + spin_lock(&dd->pport[pidx].cc_shadow_lock); + + kfree(dd->pport[pidx].congestion_entries); + dd->pport[pidx].congestion_entries = NULL; + kfree(dd->pport[pidx].ccti_entries); + dd->pport[pidx].ccti_entries = NULL; + kfree(dd->pport[pidx].ccti_entries_shadow); + dd->pport[pidx].ccti_entries_shadow = NULL; + kfree(dd->pport[pidx].congestion_entries_shadow); + dd->pport[pidx].congestion_entries_shadow = NULL; + + spin_unlock(&dd->pport[pidx].cc_shadow_lock); + } + if (!qib_wc_pat) qib_disable_wc(dd); @@ -1223,9 +1361,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, #ifdef CONFIG_PCI_MSI dd = qib_init_iba6120_funcs(pdev, ent); #else - qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " - "work if CONFIG_PCI_MSI is not enabled\n", - ent->device); + qib_early_err(&pdev->dev, + "QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", + ent->device); dd = ERR_PTR(-ENODEV); #endif break; @@ -1239,8 +1377,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, break; default: - qib_early_err(&pdev->dev, "Failing on unknown QLogic " - "deviceid 0x%x\n", ent->device); + qib_early_err(&pdev->dev, + "Failing on unknown QLogic deviceid 0x%x\n", + ent->device); ret = -ENODEV; } @@ -1249,6 +1388,10 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (ret) goto bail; /* error already printed */ + ret = qib_create_workqueues(dd); + if (ret) + goto bail; + /* do the generic initialization */ initfail = qib_init(dd, 0); @@ -1293,9 +1436,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (!qib_wc_pat) { ret = qib_enable_wc(dd); if (ret) { - qib_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); ret = 0; } } @@ -1361,9 +1504,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) gfp_flags | __GFP_COMP); if (!rcd->rcvhdrq) { - qib_dev_err(dd, "attempt to allocate %d bytes " - "for ctxt %u rcvhdrq failed\n", - amt, rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", + amt, rcd->ctxt); goto bail; } @@ -1392,8 +1535,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) return 0; bail_free: - qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " - "rcvhdrqtailaddr failed\n", rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", + rcd->ctxt); vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; bail_free_hdrq: diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 6ae57d23004..f4918f2165e 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -224,15 +224,15 @@ void qib_bad_intrstatus(struct qib_devdata *dd) * We print the message and disable interrupts, in hope of * having a better chance of debugging the problem. */ - qib_dev_err(dd, "Read of chip interrupt status failed" - " disabling interrupts\n"); + qib_dev_err(dd, + "Read of chip interrupt status failed disabling interrupts\n"); if (allbits++) { /* disable interrupt delivery, something is very wrong */ if (allbits == 2) dd->f_set_intr_state(dd, 0); if (allbits == 3) { - qib_dev_err(dd, "2nd bad interrupt status, " - "unregistering interrupts\n"); + qib_dev_err(dd, + "2nd bad interrupt status, unregistering interrupts\n"); dd->flags |= QIB_BADINTR; dd->flags &= ~QIB_INITTED; dd->f_free_irq(dd); diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 8fd19a47df0..e9486c74c22 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -35,21 +35,41 @@ /** * qib_alloc_lkey - allocate an lkey - * @rkt: lkey table in which to allocate the lkey * @mr: memory region that this lkey protects + * @dma_region: 0->normal key, 1->restricted DMA key + * + * Returns 0 if successful, otherwise returns -errno. + * + * Increments mr reference count as required. + * + * Sets the lkey field mr for non-dma regions. * - * Returns 1 if successful, otherwise returns 0. */ -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) { unsigned long flags; u32 r; u32 n; - int ret; + int ret = 0; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); + /* special case for dma_mr lkey == 0 */ + if (dma_region) { + struct qib_mregion *tmr; + + tmr = rcu_dereference(dev->dma_mr); + if (!tmr) { + qib_get_mr(mr); + rcu_assign_pointer(dev->dma_mr, mr); + mr->lkey_published = 1; + } + goto success; + } + /* Find the next available LKEY */ r = rkt->next; n = r; @@ -57,11 +77,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) if (rkt->table[r] == NULL) break; r = (r + 1) & (rkt->max - 1); - if (r == n) { - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; + if (r == n) goto bail; - } } rkt->next = (r + 1) & (rkt->max - 1); /* @@ -76,57 +93,58 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) mr->lkey |= 1 << 8; rkt->gen++; } - rkt->table[r] = mr; + qib_get_mr(mr); + rcu_assign_pointer(rkt->table[r], mr); + mr->lkey_published = 1; +success: spin_unlock_irqrestore(&rkt->lock, flags); - - ret = 1; - -bail: +out: return ret; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + ret = -ENOMEM; + goto out; } /** * qib_free_lkey - free an lkey - * @rkt: table from which to free the lkey - * @lkey: lkey id to free + * @mr: mr to free from tables */ -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr) +void qib_free_lkey(struct qib_mregion *mr) { unsigned long flags; u32 lkey = mr->lkey; u32 r; - int ret; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (lkey == 0) { - if (dev->dma_mr && dev->dma_mr == mr) { - ret = atomic_read(&dev->dma_mr->refcount); - if (!ret) - dev->dma_mr = NULL; - } else - ret = 0; - } else { + spin_lock_irqsave(&rkt->lock, flags); + if (!mr->lkey_published) + goto out; + if (lkey == 0) + rcu_assign_pointer(dev->dma_mr, NULL); + else { r = lkey >> (32 - ib_qib_lkey_table_size); - ret = atomic_read(&dev->lk_table.table[r]->refcount); - if (!ret) - dev->lk_table.table[r] = NULL; + rcu_assign_pointer(rkt->table[r], NULL); } - spin_unlock_irqrestore(&dev->lk_table.lock, flags); - - if (ret) - ret = -EBUSY; - return ret; + qib_put_mr(mr); + mr->lkey_published = 0; +out: + spin_unlock_irqrestore(&rkt->lock, flags); } /** * qib_lkey_ok - check IB SGE for validity and initialize * @rkt: table containing lkey to check SGE against + * @pd: protection domain * @isge: outgoing internal SGE * @sge: SGE to check * @acc: access flags * * Return 1 if valid and successful, otherwise returns 0. * + * increments the reference count upon success + * * Check the IB SGE for validity and initialize our internal version * of it. */ @@ -136,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_mregion *mr; unsigned n, m; size_t off; - unsigned long flags; /* * We use LKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (sge->lkey == 0) { struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) goto bail; - atomic_inc(&dev->dma_mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); - isge->mr = dev->dma_mr; + isge->mr = mr; isge->vaddr = (void *) sge->addr; isge->length = sge->length; isge->sge_length = sge->length; @@ -161,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, isge->n = 0; goto ok; } - mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey || - mr->pd != &pd->ibpd)) + mr = rcu_dereference( + rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; off = sge->addr - mr->user_base; - if (unlikely(sge->addr < mr->user_base || - off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) + if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length || + (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; if (mr->page_shift) { @@ -208,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, ok: return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); + rcu_read_unlock(); return 0; } /** * qib_rkey_ok - check the IB virtual address, length, and RKEY - * @dev: infiniband device - * @ss: SGE state + * @qp: qp for validation + * @sge: SGE state * @len: length of data * @vaddr: virtual address to place data * @rkey: rkey to check * @acc: access flags * * Return 1 if successful, otherwise 0. + * + * increments the reference count upon success */ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc) @@ -230,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, struct qib_mregion *mr; unsigned n, m; size_t off; - unsigned long flags; /* * We use RKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (rkey == 0) { struct qib_pd *pd = to_ipd(qp->ibqp.pd); struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) goto bail; - atomic_inc(&dev->dma_mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); - sge->mr = dev->dma_mr; + sge->mr = mr; sge->vaddr = (void *) vaddr; sge->length = len; sge->sge_length = len; @@ -257,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, goto ok; } - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + mr = rcu_dereference( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; off = vaddr - mr->iova; if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; if (mr->page_shift) { @@ -302,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, ok: return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); + rcu_read_unlock(); return 0; } @@ -325,7 +349,9 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) if (pd->user || rkey == 0) goto bail; - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; + mr = rcu_dereference_protected( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], + lockdep_is_held(&rkt->lock)); if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) goto bail; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 43390217a02..19f1e6c45fb 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static int reply_failure(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY; +} + static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) { struct ib_mad_send_buf *send_buf; @@ -90,14 +102,10 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) if (!ibp->sm_ah) { if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - struct ib_ah_attr attr; - memset(&attr, 0, sizeof attr); - attr.dlid = ibp->sm_lid; - attr.port_num = ppd_from_ibp(ibp)->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, ibp->sm_lid); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->sm_ah = to_iah(ah); @@ -2051,6 +2059,298 @@ bail: return ret; } +static int cc_get_classportinfo(struct ib_cc_mad *ccp, + struct ib_device *ibdev) +{ + struct ib_cc_classportinfo_attr *p = + (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->base_version = 1; + p->class_version = 1; + p->cap_mask = 0; + + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_info(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_info_attr *p = + (struct ib_cc_info_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->congestion_info = 0; + p->control_table_cap = ppd->cc_max_table_entries; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + int i; + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct ib_cc_congestion_entry_shadow *entries; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + entries = ppd->congestion_entries_shadow->entries; + p->port_control = cpu_to_be16( + ppd->congestion_entries_shadow->port_control); + p->control_map = cpu_to_be16( + ppd->congestion_entries_shadow->control_map); + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + p->entries[i].ccti_increase = entries[i].ccti_increase; + p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer); + p->entries[i].trigger_threshold = entries[i].trigger_threshold; + p->entries[i].ccti_min = entries[i].ccti_min; + } + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 max_cct_block; + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + max_cct_block = + (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES; + max_cct_block = max_cct_block ? max_cct_block - 1 : 0; + + if (cct_block_index > max_cct_block) { + spin_unlock(&ppd->cc_shadow_lock); + goto bail; + } + + ccp->attr_mod = cpu_to_be32(cct_block_index); + + cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1); + + cct_entry--; + + p->ccti_limit = cpu_to_be16(cct_entry); + + entries = &ppd->ccti_entries_shadow-> + entries[IB_CCT_ENTRIES * cct_block_index]; + cct_entry %= IB_CCT_ENTRIES; + + for (i = 0; i <= cct_entry; i++) + p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int cc_set_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int i; + + ppd->cc_sl_control_map = be16_to_cpu(p->control_map); + + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + ppd->congestion_entries[i].ccti_increase = + p->entries[i].ccti_increase; + + ppd->congestion_entries[i].ccti_timer = + be16_to_cpu(p->entries[i].ccti_timer); + + ppd->congestion_entries[i].trigger_threshold = + p->entries[i].trigger_threshold; + + ppd->congestion_entries[i].ccti_min = + p->entries[i].ccti_min; + } + + return reply((struct ib_smp *) ccp); +} + +static int cc_set_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + /* If this packet is the first in the sequence then + * zero the total table entry count. + */ + if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES) + ppd->total_cct_entry = 0; + + cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES; + + /* ccti_limit is 0 to 63 */ + ppd->total_cct_entry += (cct_entry + 1); + + if (ppd->total_cct_entry > ppd->cc_supported_table_entries) + goto bail; + + ppd->ccti_limit = be16_to_cpu(p->ccti_limit); + + entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index); + + for (i = 0; i <= cct_entry; i++) + entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry); + + spin_lock(&ppd->cc_shadow_lock); + + ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1; + memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries, + (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry))); + + ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED; + ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map; + memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries, + IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry)); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int check_cc_key(struct qib_ibport *ibp, + struct ib_cc_mad *ccp, int mad_flags) +{ + return 0; +} + +static int process_cc(struct ib_device *ibdev, int mad_flags, + u8 port, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; + struct qib_ibport *ibp = to_iport(ibdev, port); + int ret; + + *out_mad = *in_mad; + + if (ccp->class_version != 2) { + ccp->status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *)ccp); + goto bail; + } + + ret = check_cc_key(ibp, ccp, mad_flags); + if (ret) + goto bail; + + switch (ccp->method) { + case IB_MGMT_METHOD_GET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CLASSPORTINFO: + ret = cc_get_classportinfo(ccp, ibdev); + goto bail; + + case IB_CC_ATTR_CONGESTION_INFO: + ret = cc_get_congestion_info(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_get_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_get_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_set_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_set_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + case IB_MGMT_METHOD_TRAP: + default: + ccp->status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) ccp); + } + +bail: + return ret; +} + /** * qib_process_mad - process an incoming MAD packet * @ibdev: the infiniband device this packet came in on @@ -2075,6 +2375,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, struct ib_mad *in_mad, struct ib_mad *out_mad) { int ret; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: @@ -2086,6 +2388,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = process_perf(ibdev, port, in_mad, out_mad); goto bail; + case IB_MGMT_CLASS_CONG_MGMT: + if (!ppd->congestion_entries_shadow || + !qib_cc_table_size) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + goto bail; + default: ret = IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h index ecc416cdbaa..57bd3fa016b 100644 --- a/drivers/infiniband/hw/qib/qib_mad.h +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -31,6 +31,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef _QIB_MAD_H +#define _QIB_MAD_H #include <rdma/ib_pma.h> @@ -223,6 +225,198 @@ struct ib_pma_portcounters_cong { #define IB_PMA_SEL_CONG_ROUTING 0x08 /* + * Congestion control class attributes + */ +#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001) +#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002) +#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011) +#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012) +#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013) +#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014) +#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015) +#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016) +#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017) +#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018) + +/* generalizations for threshold values */ +#define IB_CC_THRESHOLD_NONE 0x0 +#define IB_CC_THRESHOLD_MIN 0x1 +#define IB_CC_THRESHOLD_MAX 0xf + +/* CCA MAD header constants */ +#define IB_CC_MAD_LOGDATA_LEN 32 +#define IB_CC_MAD_MGMTDATA_LEN 192 + +struct ib_cc_mad { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 cckey; + + /* For CongestionLog attribute only */ + u8 log_data[IB_CC_MAD_LOGDATA_LEN]; + + u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN]; +} __packed; + +/* + * Congestion Control class portinfo capability mask bits + */ +#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0) +#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1) +#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2) +#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8) + +struct ib_cc_classportinfo_attr { + u8 base_version; + u8 class_version; + __be16 cap_mask; + u8 reserved[3]; + u8 resp_time_value; /* only lower 5 bits */ + union ib_gid redirect_gid; + __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 redirect_lid; + __be16 redirect_pkey; + __be32 redirect_qp; /* only lower 24 bits */ + __be32 redirect_qkey; + union ib_gid trap_gid; + __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 trap_lid; + __be16 trap_pkey; + __be32 trap_hl_qp; /* 8, 24 bits respectively */ + __be32 trap_qkey; +} __packed; + +/* Congestion control traps */ +#define IB_CC_TRAP_KEY_VIOLATION 0x0000 + +struct ib_cc_trap_key_violation_attr { + __be16 source_lid; + u8 method; + u8 reserved1; + __be16 attrib_id; + __be32 attrib_mod; + __be32 qp; + __be64 cckey; + u8 sgid[16]; + u8 padding[24]; +} __packed; + +/* Congestion info flags */ +#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1 +#define IB_CC_TABLE_CAP_DEFAULT 31 + +struct ib_cc_info_attr { + __be16 congestion_info; + u8 control_table_cap; /* Multiple of 64 entry unit CCTs */ +} __packed; + +struct ib_cc_key_info_attr { + __be64 cckey; + u8 protect; + __be16 lease_period; + __be16 violations; +} __packed; + +#define IB_CC_CL_CA_LOGEVENTS_LEN 208 + +struct ib_cc_log_attr { + u8 log_type; + u8 congestion_flags; + __be16 threshold_event_counter; + __be16 threshold_congestion_event_map; + __be16 current_time_stamp; + u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN]; +} __packed; + +#define IB_CC_CLEC_SERVICETYPE_RC 0x0 +#define IB_CC_CLEC_SERVICETYPE_UC 0x1 +#define IB_CC_CLEC_SERVICETYPE_RD 0x2 +#define IB_CC_CLEC_SERVICETYPE_UD 0x3 + +struct ib_cc_log_event { + u8 local_qp_cn_entry; + u8 remote_qp_number_cn_entry[3]; + u8 sl_cn_entry:4; + u8 service_type_cn_entry:4; + __be32 remote_lid_cn_entry; + __be32 timestamp_cn_entry; +} __packed; + +/* Sixteen congestion entries */ +#define IB_CC_CCS_ENTRIES 16 + +/* Port control flags */ +#define IB_CC_CCS_PC_SL_BASED 0x01 + +struct ib_cc_congestion_entry { + u8 ccti_increase; + __be16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_entry_shadow { + u8 ccti_increase; + u16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_setting_attr { + __be16 port_control; + __be16 control_map; + struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES]; +} __packed; + +struct ib_cc_congestion_setting_attr_shadow { + u16 port_control; + u16 control_map; + struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES]; +} __packed; + +#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1 +#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1 + +/* 64 Congestion Control table entries in a single MAD */ +#define IB_CCT_ENTRIES 64 +#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2) + +struct ib_cc_table_entry { + __be16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_entry_shadow { + u16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_attr { + __be16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +struct ib_cc_table_attr_shadow { + u16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +#define CC_TABLE_SHADOW_MAX \ + (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES) + +struct cc_table_shadow { + u16 ccti_last_entry; + struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX]; +} __packed; + +#endif /* _QIB_MAD_H */ +/* * The PortSamplesControl.CounterMasks field is an array of 3 bit fields * which specify the N'th counter's capabilities. See ch. 16.1.3.2. * We support 5 counters which only count the mandatory quantities. diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 08944e2ee33..e6687ded821 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -47,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) return container_of(ibfmr, struct qib_fmr, ibfmr); } +static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, + int count) +{ + int m, i = 0; + int rval = 0; + + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; + for (; i < m; i++) { + mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL); + if (!mr->map[i]) + goto bail; + } + mr->mapsz = m; + init_completion(&mr->comp); + /* count returning the ptr to user */ + atomic_set(&mr->refcount, 1); + mr->pd = pd; + mr->max_segs = count; +out: + return rval; +bail: + while (i) + kfree(mr->map[--i]); + rval = -ENOMEM; + goto out; +} + +static void deinit_qib_mregion(struct qib_mregion *mr) +{ + int i = mr->mapsz; + + mr->mapsz = 0; + while (i) + kfree(mr->map[--i]); +} + + /** * qib_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region @@ -58,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) */ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) { - struct qib_ibdev *dev = to_idev(pd->device); - struct qib_mr *mr; + struct qib_mr *mr = NULL; struct ib_mr *ret; - unsigned long flags; + int rval; if (to_ipd(pd)->user) { ret = ERR_PTR(-EPERM); @@ -74,61 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) goto bail; } - mr->mr.access_flags = acc; - atomic_set(&mr->mr.refcount, 0); + rval = init_qib_mregion(&mr->mr, pd, 0); + if (rval) { + ret = ERR_PTR(rval); + goto bail; + } + - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (!dev->dma_mr) - dev->dma_mr = &mr->mr; - spin_unlock_irqrestore(&dev->lk_table.lock, flags); + rval = qib_alloc_lkey(&mr->mr, 1); + if (rval) { + ret = ERR_PTR(rval); + goto bail_mregion; + } + mr->mr.access_flags = acc; ret = &mr->ibmr; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - return ret; + kfree(mr); + goto done; } -static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) +static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) { struct qib_mr *mr; - int m, i = 0; + int rval = -ENOMEM; + int m; /* Allocate struct plus pointers to first level page tables. */ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; - mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); if (!mr) - goto done; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); - if (!mr->mr.map[i]) - goto bail; - } - mr->mr.mapsz = m; - mr->mr.page_shift = 0; - mr->mr.max_segs = count; + goto bail; + rval = init_qib_mregion(&mr->mr, pd, count); + if (rval) + goto bail; /* * ib_reg_phys_mr() will initialize mr->ibmr except for * lkey and rkey. */ - if (!qib_alloc_lkey(lk_table, &mr->mr)) - goto bail; + rval = qib_alloc_lkey(&mr->mr, 0); + if (rval) + goto bail_mregion; mr->ibmr.lkey = mr->mr.lkey; mr->ibmr.rkey = mr->mr.lkey; +done: + return mr; - atomic_set(&mr->mr.refcount, 0); - goto done; - +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - while (i) - kfree(mr->mr.map[--i]); kfree(mr); - mr = NULL; - -done: - return mr; + mr = ERR_PTR(rval); + goto done; } /** @@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, int n, m, i; struct ib_mr *ret; - mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); - if (mr == NULL) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(num_phys_buf, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; goto bail; } - mr->mr.pd = pd; mr->mr.user_base = *iova_start; mr->mr.iova = *iova_start; - mr->mr.length = 0; - mr->mr.offset = 0; mr->mr.access_flags = acc; - mr->umem = NULL; m = 0; n = 0; @@ -186,7 +221,6 @@ bail: * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register - * @virt_addr: virtual address to use (from HCA's point of view) * @mr_access_flags: access flags for this memory region * @udata: unused by the QLogic_IB driver * @@ -216,14 +250,13 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, list_for_each_entry(chunk, &umem->chunk_list, list) n += chunk->nents; - mr = alloc_mr(n, &to_idev(pd->device)->lk_table); - if (!mr) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(n, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; ib_umem_release(umem); goto bail; } - mr->mr.pd = pd; mr->mr.user_base = start; mr->mr.iova = virt_addr; mr->mr.length = length; @@ -271,21 +304,25 @@ bail: int qib_dereg_mr(struct ib_mr *ibmr) { struct qib_mr *mr = to_imr(ibmr); - struct qib_ibdev *dev = to_idev(ibmr->device); - int ret; - int i; - - ret = qib_free_lkey(dev, &mr->mr); - if (ret) - return ret; - - i = mr->mr.mapsz; - while (i) - kfree(mr->mr.map[--i]); + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&mr->mr); + + qib_put_mr(&mr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&mr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&mr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&mr->mr); if (mr->umem) ib_umem_release(mr->umem); kfree(mr); - return 0; +out: + return ret; } /* @@ -298,17 +335,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct qib_mr *mr; - mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table); - if (mr == NULL) - return ERR_PTR(-ENOMEM); - - mr->mr.pd = pd; - mr->mr.user_base = 0; - mr->mr.iova = 0; - mr->mr.length = 0; - mr->mr.offset = 0; - mr->mr.access_flags = 0; - mr->umem = NULL; + mr = alloc_mr(max_page_list_len, pd); + if (IS_ERR(mr)) + return (struct ib_mr *)mr; return &mr->ibmr; } @@ -322,11 +351,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) if (size > PAGE_SIZE) return ERR_PTR(-EINVAL); - pl = kmalloc(sizeof *pl, GFP_KERNEL); + pl = kzalloc(sizeof *pl, GFP_KERNEL); if (!pl) return ERR_PTR(-ENOMEM); - pl->page_list = kmalloc(size, GFP_KERNEL); + pl->page_list = kzalloc(size, GFP_KERNEL); if (!pl->page_list) goto err_free; @@ -355,57 +384,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { struct qib_fmr *fmr; - int m, i = 0; + int m; struct ib_fmr *ret; + int rval = -ENOMEM; /* Allocate struct plus pointers to first level page tables. */ m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; - fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); if (!fmr) goto bail; - /* Allocate first level page tables. */ - for (; i < m; i++) { - fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], - GFP_KERNEL); - if (!fmr->mr.map[i]) - goto bail; - } - fmr->mr.mapsz = m; + rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); + if (rval) + goto bail; /* * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & * rkey. */ - if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) - goto bail; + rval = qib_alloc_lkey(&fmr->mr, 0); + if (rval) + goto bail_mregion; fmr->ibfmr.rkey = fmr->mr.lkey; fmr->ibfmr.lkey = fmr->mr.lkey; /* * Resources are allocated but no valid mapping (RKEY can't be * used). */ - fmr->mr.pd = pd; - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - fmr->mr.offset = 0; fmr->mr.access_flags = mr_access_flags; fmr->mr.max_segs = fmr_attr->max_pages; fmr->mr.page_shift = fmr_attr->page_shift; - atomic_set(&fmr->mr.refcount, 0); ret = &fmr->ibfmr; - goto done; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&fmr->mr); bail: - while (i) - kfree(fmr->mr.map[--i]); kfree(fmr); - ret = ERR_PTR(-ENOMEM); - -done: - return ret; + ret = ERR_PTR(rval); + goto done; } /** @@ -428,7 +447,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, u32 ps; int ret; - if (atomic_read(&fmr->mr.refcount)) + i = atomic_read(&fmr->mr.refcount); + if (i > 2) return -EBUSY; if (list_len > fmr->mr.max_segs) { @@ -490,16 +510,27 @@ int qib_unmap_fmr(struct list_head *fmr_list) int qib_dealloc_fmr(struct ib_fmr *ibfmr) { struct qib_fmr *fmr = to_ifmr(ibfmr); - int ret; - int i; + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&fmr->mr); + qib_put_mr(&fmr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&fmr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&fmr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&fmr->mr); + kfree(fmr); +out: + return ret; +} - ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr); - if (ret) - return ret; +void mr_rcu_callback(struct rcu_head *list) +{ + struct qib_mregion *mr = container_of(list, struct qib_mregion, list); - i = fmr->mr.mapsz; - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - return 0; + complete(&mr->comp); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 790646ef510..062c301ebf5 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -224,8 +224,9 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, } do_intx: if (ret) { - qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " - "falling back to INTx\n", tabsize, ret); + qib_dev_err(dd, + "pci_enable_msix %d vectors failed: %d, falling back to INTx\n", + tabsize, ret); tabsize = 0; } for (i = 0; i < tabsize; i++) @@ -251,8 +252,9 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos) ret = pci_enable_msi(pdev); if (ret) - qib_dev_err(dd, "pci_enable_msi failed: %d, " - "interrupts may not work\n", ret); + qib_dev_err(dd, + "pci_enable_msi failed: %d, interrupts may not work\n", + ret); /* continue even if it fails, we may still be OK... */ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, @@ -358,8 +360,8 @@ int qib_reinit_intr(struct qib_devdata *dd) pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); if (!pos) { - qib_dev_err(dd, "Can't find MSI capability, " - "can't restore MSI settings\n"); + qib_dev_err(dd, + "Can't find MSI capability, can't restore MSI settings\n"); ret = 0; /* nothing special for MSIx, just MSI */ goto bail; @@ -471,8 +473,8 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); r = pci_enable_device(dd->pcidev); if (r) - qib_dev_err(dd, "pci_enable_device failed after " - "reset: %d\n", r); + qib_dev_err(dd, + "pci_enable_device failed after reset: %d\n", r); } /* code to adjust PCIe capabilities. */ @@ -717,15 +719,16 @@ qib_pci_mmio_enabled(struct pci_dev *pdev) if (words == ~0ULL) ret = PCI_ERS_RESULT_NEED_RESET; } - qib_devinfo(pdev, "QIB mmio_enabled function called, " - "read wordscntr %Lx, returning %d\n", words, ret); + qib_devinfo(pdev, + "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n", + words, ret); return ret; } static pci_ers_result_t qib_pci_slot_reset(struct pci_dev *pdev) { - qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); + qib_devinfo(pdev, "QIB slot_reset function called, ignored\n"); return PCI_ERS_RESULT_CAN_RECOVER; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 1ce56b51ab1..4850d03870c 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -250,23 +250,33 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) spin_lock_irqsave(&dev->qpt_lock, flags); - if (ibp->qp0 == qp) { + if (rcu_dereference_protected(ibp->qp0, + lockdep_is_held(&dev->qpt_lock)) == qp) { atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp0, NULL); - } else if (ibp->qp1 == qp) { + } else if (rcu_dereference_protected(ibp->qp1, + lockdep_is_held(&dev->qpt_lock)) == qp) { atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp1, NULL); } else { - struct qib_qp *q, **qpp; + struct qib_qp *q; + struct qib_qp __rcu **qpp; qpp = &dev->qp_table[n]; - for (; (q = *qpp) != NULL; qpp = &q->next) + q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock)); + for (; q; qpp = &q->next) { if (q == qp) { atomic_dec(&qp->refcount); - rcu_assign_pointer(*qpp, qp->next); - qp->next = NULL; + *qpp = qp->next; + rcu_assign_pointer(qp->next, NULL); + q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock)); break; } + q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock)); + } } spin_unlock_irqrestore(&dev->qpt_lock, flags); @@ -302,10 +312,12 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) spin_lock_irqsave(&dev->qpt_lock, flags); for (n = 0; n < dev->qp_table_size; n++) { - qp = dev->qp_table[n]; + qp = rcu_dereference_protected(dev->qp_table[n], + lockdep_is_held(&dev->qpt_lock)); rcu_assign_pointer(dev->qp_table[n], NULL); - for (; qp; qp = qp->next) + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))) qp_inuse++; } spin_unlock_irqrestore(&dev->qpt_lock, flags); @@ -337,7 +349,8 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) unsigned n = qpn_hash(dev, qpn); rcu_read_lock(); - for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next) + for (qp = rcu_dereference(dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) if (qp->ibqp.qp_num == qpn) break; } @@ -406,18 +419,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) unsigned n; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (clr_sends) { while (qp->s_last != qp->s_head) { @@ -427,7 +431,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || @@ -437,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) qp->s_last = 0; } if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } } @@ -450,7 +454,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } } @@ -495,7 +499,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) if (!(qp->s_flags & QIB_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_tx) { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index b641416148e..3ab341320ea 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } /* FALLTHROUGH */ @@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, /* Copy SGE state in case we need to resend */ qp->s_rdma_mr = e->rdma_sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; @@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, qp->s_cur_sge = &qp->s_ack_rdma_sge; qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; @@ -1012,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1068,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1730,7 +1730,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } if (len != 0) { @@ -2024,11 +2024,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) break; @@ -2116,7 +2112,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } reth = &ohdr->u.rc.reth; @@ -2188,7 +2184,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; @@ -2210,7 +2206,7 @@ send_last: (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; e->opcode = opcode; e->sent = 0; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index c0ee7e095d8..357b6cfcd46 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -110,7 +110,7 @@ bad_lkey: while (j) { struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); @@ -501,7 +501,7 @@ again: (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->wr.wr.atomic.swap); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -525,7 +525,7 @@ again: sge->sge_length -= len; if (sge->sge_length == 0) { if (!release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -542,11 +542,7 @@ again: sqp->s_len -= len; } if (release) - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -782,7 +778,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index ac065dd6b69..a322d5171a2 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -342,15 +342,17 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0, 0); if (ret < 0) - qib_dev_err(dd, "Failed checking TRIMDONE, chn %d" - " (%s)\n", chn, where); + qib_dev_err(dd, + "Failed checking TRIMDONE, chn %d (%s)\n", + chn, where); if (!(ret & 0x10)) { int probe; baduns |= (1 << chn); - qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)." - " (%s)\n", chn, ret, where); + qib_dev_err(dd, + "TRIMDONE cleared on chn %d (%02X). (%s)\n", + chn, ret, where); probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_PGUDP(0), 0, 0); qib_dev_err(dd, "probe is %d (%02X)\n", @@ -375,8 +377,8 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0x10, 0x10); if (ret < 0) - qib_dev_err(dd, "Failed re-setting " - "TRIMDONE, chn %d (%s)\n", + qib_dev_err(dd, + "Failed re-setting TRIMDONE, chn %d (%s)\n", chn, where); } } @@ -1144,10 +1146,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "pre-read failed: elt %d," - " addr 0x%X, chnl %d\n", - (sloc & 0xF), - (sloc >> 9) & 0x3f, chnl); + qib_dev_err(dd, + "pre-read failed: elt %d, addr 0x%X, chnl %d\n", + (sloc & 0xF), + (sloc >> 9) & 0x3f, chnl); return ret; } val = (ret & ~mask) | (val & mask); @@ -1157,9 +1159,9 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Global WR failed: elt %d," - " addr 0x%X, val %02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, val); + qib_dev_err(dd, + "Global WR failed: elt %d, addr 0x%X, val %02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, val); } return ret; } @@ -1173,11 +1175,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Write failed: elt %d," - " addr 0x%X, chnl %d, val 0x%02X," - " mask 0x%02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, - val & 0xFF, mask & 0xFF); + qib_dev_err(dd, + "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, + val & 0xFF, mask & 0xFF); break; } } diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 12a9604310d..3fc51443121 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -276,8 +277,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) GFP_KERNEL); if (!ppd->sdma_descq) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor " - "FIFO memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA descriptor FIFO memory\n"); goto bail; } @@ -285,8 +286,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev, PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL); if (!ppd->sdma_head_dma) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA " - "head memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA head memory\n"); goto cleanup_descq; } ppd->sdma_head_dma[0] = 0; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index dd9cd49d097..034cc821de5 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,41 +34,7 @@ #include <linux/ctype.h> #include "qib.h" - -/** - * qib_parse_ushort - parse an unsigned short value in an arbitrary base - * @str: the string containing the number - * @valp: where to put the result - * - * Returns the number of bytes consumed, or negative value on error. - */ -static int qib_parse_ushort(const char *str, unsigned short *valp) -{ - unsigned long val; - char *end; - int ret; - - if (!isdigit(str[0])) { - ret = -EINVAL; - goto bail; - } - - val = simple_strtoul(str, &end, 0); - - if (val > 0xffff) { - ret = -EINVAL; - goto bail; - } - - *valp = val; - - ret = end + 1 - str; - if (ret == 0) - ret = -EINVAL; - -bail: - return ret; -} +#include "qib_mad.h" /* start of per-port functions */ /* @@ -90,7 +57,11 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); + ret = kstrtou16(buf, 0, &val); + if (ret) { + qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + return ret; + } /* * Set the "intentional" heartbeat enable per either of @@ -99,10 +70,7 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, * because entering loopback mode overrides it and automatically * disables heartbeat. */ - if (ret >= 0) - ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); - if (ret < 0) - qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); return ret < 0 ? ret : count; } @@ -126,12 +94,14 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); - if (ret > 0) - qib_set_led_override(ppd, val); - else + ret = kstrtou16(buf, 0, &val); + if (ret) { qib_dev_err(dd, "attempt to set invalid LED override\n"); - return ret < 0 ? ret : count; + return ret; + } + + qib_set_led_override(ppd, val); + return count; } static ssize_t show_status(struct qib_pportdata *ppd, char *buf) @@ -231,6 +201,98 @@ static struct attribute *port_default_attributes[] = { NULL }; +/* + * Start of per-port congestion control structures and support code + */ + +/* + * Congestion control table size followed by table entries + */ +static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->ccti_entries_shadow) + return -EINVAL; + + ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow) + + sizeof(__be16); + + if (pos > ret) + return -EINVAL; + + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->ccti_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static void qib_port_release(struct kobject *kobj) +{ + /* nothing to do since memory is freed by qib_free_devdata() */ +} + +static struct kobj_type qib_port_cc_ktype = { + .release = qib_port_release, +}; + +static struct bin_attribute cc_table_bin_attr = { + .attr = {.name = "cc_table_bin", .mode = 0444}, + .read = read_cc_table_bin, + .size = PAGE_SIZE, +}; + +/* + * Congestion settings: port control, control map and an array of 16 + * entries for the congestion entries - increase, timer, event log + * trigger threshold and the minimum injection rate delay. + */ +static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return -EINVAL; + + ret = sizeof(struct ib_cc_congestion_setting_attr_shadow); + + if (pos > ret) + return -EINVAL; + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->congestion_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static struct bin_attribute cc_setting_bin_attr = { + .attr = {.name = "cc_settings_bin", .mode = 0444}, + .read = read_cc_setting_bin, + .size = PAGE_SIZE, +}; + + static ssize_t qib_portattr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -253,10 +315,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj, return pattr->store(ppd, buf, len); } -static void qib_port_release(struct kobject *kobj) -{ - /* nothing to do since memory is freed by qib_free_devdata() */ -} static const struct sysfs_ops qib_port_ops = { .show = qib_portattr_show, @@ -411,12 +469,12 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - char *endp; - long val = simple_strtol(buf, &endp, 0); - - if (val < 0 || endp == buf) - return -EINVAL; + u32 val; + int ret; + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; *(u32 *)((char *) qibp + dattr->counter) = val; return size; } @@ -649,8 +707,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, int ret; if (!port_num || port_num > dd->num_pports) { - qib_dev_err(dd, "Skipping infiniband class with " - "invalid port %u\n", port_num); + qib_dev_err(dd, + "Skipping infiniband class with invalid port %u\n", + port_num); ret = -ENODEV; goto bail; } @@ -659,8 +718,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj, "linkcontrol"); if (ret) { - qib_dev_err(dd, "Skipping linkcontrol sysfs info, " - "(err %d) port %u\n", ret, port_num); + qib_dev_err(dd, + "Skipping linkcontrol sysfs info, (err %d) port %u\n", + ret, port_num); goto bail; } kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); @@ -668,26 +728,70 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj, "sl2vl"); if (ret) { - qib_dev_err(dd, "Skipping sl2vl sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_sl; + qib_dev_err(dd, + "Skipping sl2vl sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_link; } kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj, "diag_counters"); if (ret) { - qib_dev_err(dd, "Skipping diag_counters sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_diagc; + qib_dev_err(dd, + "Skipping diag_counters sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_sl; } kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return 0; + + ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype, + kobj, "CCMgtA"); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_diagc; + } + + kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control setting sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc; + } + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control table sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc_entry_bin; + } + + qib_devinfo(dd->pcidev, + "IB%u: Congestion Control Agent enabled for port %d\n", + dd->unit, port_num); + return 0; +bail_cc_entry_bin: + sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr); +bail_cc: + kobject_put(&ppd->pport_cc_kobj); bail_diagc: - kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->diagc_kobj); bail_sl: + kobject_put(&ppd->sl2vl_kobj); +bail_link: kobject_put(&ppd->pport_kobj); bail: return ret; @@ -720,7 +824,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd) for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; - kobject_put(&ppd->pport_kobj); + if (qib_cc_table_size && + ppd->congestion_entries_shadow) { + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + kobject_put(&ppd->pport_cc_kobj); + } kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->pport_kobj); } } diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index ddde72e11ed..647f7beb1b0 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -449,8 +450,9 @@ int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, goto failed_write; ret = qib_twsi_wr(dd, addr, 0); if (ret) { - qib_dev_err(dd, "Failed to write interface" - " write addr %02X\n", addr); + qib_dev_err(dd, + "Failed to write interface write addr %02X\n", + addr); goto failed_write; } } diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index ce7387ff5d9..aa3a8035bb6 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -281,11 +281,7 @@ inv: set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -403,14 +399,9 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto rewind; wc.opcode = IB_WC_RECV; -last_imm: qib_copy_sge(&qp->r_sge, data, tlen, 0); - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); +last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; @@ -493,13 +484,7 @@ rdma_last_imm: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr-> - refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); else { ret = qib_get_rwqe(qp, 1); if (ret < 0) @@ -509,6 +494,8 @@ rdma_last_imm: } wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + qib_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): @@ -524,11 +511,7 @@ rdma_last: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); break; default: diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index a468bf2d446..d6c7fe7f88d 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -194,11 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) } length -= len; } - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; @@ -556,11 +552,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } else qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 7b6c3bffa9d..fc9b205c241 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -183,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -224,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -333,7 +333,8 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) * @qp: the QP to post on * @wr: the work request to send */ -static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr) +static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, + int *scheduled) { struct qib_swqe *wqe; u32 next; @@ -435,11 +436,17 @@ bail_inval_free: while (j) { struct qib_sge *sge = &wqe->sg_list[--j]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } bail_inval: ret = -EINVAL; bail: + if (!ret && !wr->next && + !qib_sdma_empty( + dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { + qib_schedule_send(qp); + *scheduled = 1; + } spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -457,9 +464,10 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct qib_qp *qp = to_iqp(ibqp); int err = 0; + int scheduled = 0; for (; wr; wr = wr->next) { - err = qib_post_one_send(qp, wr); + err = qib_post_one_send(qp, wr, &scheduled); if (err) { *bad_wr = wr; goto bail; @@ -467,7 +475,8 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } /* Try to do the send work in the caller's context. */ - qib_do_send(&qp->s_work); + if (!scheduled) + qib_do_send(&qp->s_work); bail: return err; @@ -978,7 +987,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); if (tx->mr) { - atomic_dec(&tx->mr->refcount); + qib_put_mr(tx->mr); tx->mr = NULL; } if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { @@ -1336,7 +1345,7 @@ done: } qib_sendbuf_done(dd, pbufn); if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_wqe) { @@ -1845,6 +1854,23 @@ bail: return ret; } +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) +{ + struct ib_ah_attr attr; + struct ib_ah *ah = ERR_PTR(-EINVAL); + struct qib_qp *qp0; + + memset(&attr, 0, sizeof attr); + attr.dlid = dlid; + attr.port_num = ppd_from_ibp(ibp)->port; + rcu_read_lock(); + qp0 = rcu_dereference(ibp->qp0); + if (qp0) + ah = ib_create_ah(qp0->ibqp.pd, &attr); + rcu_read_unlock(); + return ah; +} + /** * qib_destroy_ah - destroy an address handle * @ibah: the AH to destroy @@ -2060,13 +2086,15 @@ int qib_register_ib_device(struct qib_devdata *dd) spin_lock_init(&dev->lk_table.lock); dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - dev->lk_table.table = (struct qib_mregion **) + dev->lk_table.table = (struct qib_mregion __rcu **) __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; } - memset(dev->lk_table.table, 0, lk_tab_size); + RCU_INIT_POINTER(dev->dma_mr, NULL); + for (i = 0; i < dev->lk_table.max; i++) + RCU_INIT_POINTER(dev->lk_table.table[i], NULL); INIT_LIST_HEAD(&dev->pending_mmaps); spin_lock_init(&dev->pending_lock); dev->mmap_offset = PAGE_SIZE; @@ -2289,3 +2317,17 @@ void qib_unregister_ib_device(struct qib_devdata *dd) get_order(lk_tab_size)); kfree(dev->qp_table); } + +/* + * This must be called with s_lock held. + */ +void qib_schedule_send(struct qib_qp *qp) +{ + if (qib_send_ok(qp)) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + queue_work(ppd->qib_wq, &qp->s_work); + } +} diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 48760602465..aff8b2c1788 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,6 +41,7 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <linux/workqueue.h> +#include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> @@ -302,6 +303,9 @@ struct qib_mregion { u32 max_segs; /* number of qib_segs in all the arrays */ u32 mapsz; /* size of the map array */ u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + struct rcu_head list; atomic_t refcount; struct qib_segarray *map[0]; /* the segments */ }; @@ -416,7 +420,7 @@ struct qib_qp { /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; - struct qib_qp *next; /* link list for QPN hash table */ + struct qib_qp __rcu *next; /* link list for QPN hash table */ struct qib_swqe *s_wq; /* send work queue */ struct qib_mmap_info *ip; struct qib_ib_header *s_hdr; /* next packet header to send */ @@ -646,7 +650,7 @@ struct qib_lkey_table { u32 next; /* next unused index (speeds search) */ u32 gen; /* generation count */ u32 max; /* size of the table */ - struct qib_mregion **table; + struct qib_mregion __rcu **table; }; struct qib_opcode_stats { @@ -655,8 +659,8 @@ struct qib_opcode_stats { }; struct qib_ibport { - struct qib_qp *qp0; - struct qib_qp *qp1; + struct qib_qp __rcu *qp0; + struct qib_qp __rcu *qp1; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct qib_ah *sm_ah; struct qib_ah *smi_ah; @@ -723,12 +727,13 @@ struct qib_ibport { struct qib_opcode_stats opstats[128]; }; + struct qib_ibdev { struct ib_device ibdev; struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; - struct qib_mregion *dma_mr; + struct qib_mregion __rcu *dma_mr; /* QP numbers are shared by all IB ports */ struct qib_qpn_table qpn_table; @@ -739,7 +744,7 @@ struct qib_ibdev { struct list_head memwait; /* list for wait kernel memory */ struct list_head txreq_free; struct timer_list mem_timer; - struct qib_qp **qp_table; + struct qib_qp __rcu **qp_table; struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ @@ -832,11 +837,7 @@ extern struct workqueue_struct *qib_cq_wq; /* * This must be called with s_lock held. */ -static inline void qib_schedule_send(struct qib_qp *qp) -{ - if (qib_send_ok(qp)) - queue_work(ib_wq, &qp->s_work); -} +void qib_schedule_send(struct qib_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) { @@ -933,6 +934,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); + void qib_rc_rnr_retry(unsigned long arg); void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); @@ -944,9 +947,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct qib_qp *qp); -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr); +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr); +void qib_free_lkey(struct qib_mregion *mr); int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_sge *isge, struct ib_sge *sge, int acc); @@ -1014,6 +1017,29 @@ int qib_unmap_fmr(struct list_head *fmr_list); int qib_dealloc_fmr(struct ib_fmr *ibfmr); +static inline void qib_get_mr(struct qib_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +void mr_rcu_callback(struct rcu_head *list); + +static inline void qib_put_mr(struct qib_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + call_rcu(&mr->list, mr_rcu_callback); +} + +static inline void qib_put_ss(struct qib_sge_state *ss) +{ + while (ss->num_sge) { + qib_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + + void qib_release_mmap_info(struct kref *ref); struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index 561b8bca406..1d7281c5a02 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -102,10 +103,10 @@ int qib_enable_wc(struct qib_devdata *dd) u64 atmp; atmp = pioaddr & ~(piolen - 1); if (atmp < addr || (atmp + piolen) > (addr + len)) { - qib_dev_err(dd, "No way to align address/size " - "(%llx/%llx), no WC mtrr\n", - (unsigned long long) atmp, - (unsigned long long) piolen << 1); + qib_dev_err(dd, + "No way to align address/size (%llx/%llx), no WC mtrr\n", + (unsigned long long) atmp, + (unsigned long long) piolen << 1); ret = -ENODEV; } else { pioaddr = atmp; @@ -120,8 +121,7 @@ int qib_enable_wc(struct qib_devdata *dd) if (cookie < 0) { { qib_devinfo(dd->pcidev, - "mtrr_add() WC for PIO bufs " - "failed (%d)\n", + "mtrr_add() WC for PIO bufs failed (%d)\n", cookie); ret = -EINVAL; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 86df632ea61..ca43901ed86 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -92,6 +92,8 @@ enum { IPOIB_STOP_REAPER = 7, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, + IPOIB_STOP_NEIGH_GC = 11, + IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -260,6 +262,20 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_neigh_hash { + struct ipoib_neigh __rcu **buckets; + struct rcu_head rcu; + u32 mask; + u32 size; +}; + +struct ipoib_neigh_table { + struct ipoib_neigh_hash __rcu *htbl; + rwlock_t rwlock; + atomic_t entries; + struct completion flushed; +}; + /* * Device private locking: network stack tx_lock protects members used * in TX fast path, lock protects everything else. lock nests inside @@ -279,6 +295,8 @@ struct ipoib_dev_priv { struct rb_root path_tree; struct list_head path_list; + struct ipoib_neigh_table ntbl; + struct ipoib_mcast *broadcast; struct list_head multicast_list; struct rb_root multicast_tree; @@ -291,7 +309,7 @@ struct ipoib_dev_priv { struct work_struct flush_heavy; struct work_struct restart_task; struct delayed_work ah_reap_task; - + struct delayed_work neigh_reap_task; struct ib_device *ca; u8 port; u16 pkey; @@ -377,13 +395,16 @@ struct ipoib_neigh { #ifdef CONFIG_INFINIBAND_IPOIB_CM struct ipoib_cm_tx *cm; #endif - union ib_gid dgid; + u8 daddr[INFINIBAND_ALEN]; struct sk_buff_head queue; - struct neighbour *neighbour; struct net_device *dev; struct list_head list; + struct ipoib_neigh __rcu *hnext; + struct rcu_head rcu; + atomic_t refcnt; + unsigned long alive; }; #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) @@ -394,21 +415,17 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu) return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; } -/* - * We stash a pointer to our private neighbour information after our - * hardware address in neigh->ha. The ALIGN() expression here makes - * sure that this pointer is stored aligned so that an unaligned - * load is not needed to dereference it. - */ -static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) +void ipoib_neigh_dtor(struct ipoib_neigh *neigh); +static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) { - return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) + - INFINIBAND_ALEN, sizeof(void *)); + if (atomic_dec_and_test(&neigh->refcnt)) + ipoib_neigh_dtor(neigh); } - -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, +struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr); +struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, struct net_device *dev); -void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); +void ipoib_neigh_free(struct ipoib_neigh *neigh); +void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid); extern struct workqueue_struct *ipoib_workqueue; @@ -425,7 +442,6 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) { kref_put(&ah->ref, ipoib_free_ah); } - int ipoib_open(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_umcast_attr(struct net_device *dev); @@ -455,7 +471,7 @@ void ipoib_dev_cleanup(struct net_device *dev); void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_carrier_on_task(struct work_struct *work); -void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); +void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); void ipoib_mcast_restart_task(struct work_struct *work); int ipoib_mcast_start_thread(struct net_device *dev); @@ -517,10 +533,10 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev) test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); } -static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) +static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) { struct ipoib_dev_priv *priv = netdev_priv(dev); - return IPOIB_CM_SUPPORTED(n->ha) && + return IPOIB_CM_SUPPORTED(hwaddr) && test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); } @@ -575,7 +591,7 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev) { return 0; } -static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) +static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) { return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 014504d8e43..95ecf4eadf5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -811,9 +811,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (neigh) { neigh->cm = NULL; list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); tx->neigh = NULL; } @@ -1230,9 +1228,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, if (neigh) { neigh->cm = NULL; list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); tx->neigh = NULL; } @@ -1279,7 +1275,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); ipoib_dbg(priv, "Reap connection for gid %pI6\n", - tx->neigh->dgid.raw); + tx->neigh->daddr + 4); tx->neigh = NULL; } } @@ -1304,7 +1300,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; - qpn = IPOIB_QPN(neigh->neighbour->ha); + qpn = IPOIB_QPN(neigh->daddr); memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); spin_unlock_irqrestore(&priv->lock, flags); @@ -1320,9 +1316,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) if (neigh) { neigh->cm = NULL; list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); } list_del(&p->list); kfree(p); @@ -1376,7 +1370,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) if (skb->protocol == htons(ETH_P_IP)) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); #endif @@ -1397,7 +1391,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, int e = skb_queue_empty(&priv->cm.skb_queue); if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3974c290b66..97920b77a5d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -46,7 +46,8 @@ #include <linux/ip.h> #include <linux/in.h> -#include <net/dst.h> +#include <linux/jhash.h> +#include <net/arp.h> MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); @@ -84,6 +85,7 @@ struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); +static void ipoib_neigh_reclaim(struct rcu_head *rp); static struct ib_client ipoib_client = { .name = "ipoib", @@ -264,30 +266,15 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path) static void path_free(struct net_device *dev, struct ipoib_path *path) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_neigh *neigh, *tn; struct sk_buff *skb; - unsigned long flags; while ((skb = __skb_dequeue(&path->queue))) dev_kfree_skb_irq(skb); - spin_lock_irqsave(&priv->lock, flags); - - list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { - /* - * It's safe to call ipoib_put_ah() inside priv->lock - * here, because we know that path->ah will always - * hold one more reference, so ipoib_put_ah() will - * never do more than decrement the ref count. - */ - if (neigh->ah) - ipoib_put_ah(neigh->ah); - - ipoib_neigh_free(dev, neigh); - } + ipoib_dbg(netdev_priv(dev), "path_free\n"); - spin_unlock_irqrestore(&priv->lock, flags); + /* remove all neigh connected to this path */ + ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); if (path->ah) ipoib_put_ah(path->ah); @@ -458,19 +445,15 @@ static void path_rec_completion(int status, } kref_get(&path->ah->ref); neigh->ah = path->ah; - memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, - sizeof(union ib_gid)); - if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (ipoib_cm_enabled(dev, neigh->daddr)) { if (!ipoib_cm_get(neigh)) ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); if (!ipoib_cm_get(neigh)) { list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); continue; } } @@ -555,15 +538,15 @@ static int path_rec_start(struct net_device *dev, return 0; } -/* called with rcu_read_lock */ -static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) +static void neigh_add_path(struct sk_buff *skb, u8 *daddr, + struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; unsigned long flags; - neigh = ipoib_neigh_alloc(n, skb->dev); + neigh = ipoib_neigh_alloc(daddr, dev); if (!neigh) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -572,9 +555,9 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, n->ha + 4); + path = __path_find(dev, daddr + 4); if (!path) { - path = path_rec_create(dev, n->ha + 4); + path = path_rec_create(dev, daddr + 4); if (!path) goto err_path; @@ -586,17 +569,13 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ if (path->ah) { kref_get(&path->ah->ref); neigh->ah = path->ah; - memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, - sizeof(union ib_gid)); - if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (ipoib_cm_enabled(dev, neigh->daddr)) { if (!ipoib_cm_get(neigh)) ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); if (!ipoib_cm_get(neigh)) { list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); goto err_drop; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) @@ -608,7 +587,8 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ } } else { spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr)); + ipoib_neigh_put(neigh); return; } } else { @@ -621,35 +601,20 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ } spin_unlock_irqrestore(&priv->lock, flags); + ipoib_neigh_put(neigh); return; err_list: list_del(&neigh->list); err_path: - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); err_drop: ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); spin_unlock_irqrestore(&priv->lock, flags); -} - -/* called with rcu_read_lock */ -static void ipoib_path_lookup(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(skb->dev); - - /* Look up path record for unicasts */ - if (n->ha[4] != 0xff) { - neigh_add_path(skb, n, dev); - return; - } - - /* Add in the P_Key for multicasts */ - n->ha[8] = (priv->pkey >> 8) & 0xff; - n->ha[9] = priv->pkey & 0xff; - ipoib_mcast_send(dev, n->ha + 4, skb); + ipoib_neigh_put(neigh); } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -710,94 +675,80 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - struct neighbour *n = NULL; + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; + struct ipoib_header *header; unsigned long flags; - rcu_read_lock(); - if (likely(skb_dst(skb))) { - n = dst_get_neighbour_noref(skb_dst(skb)); - if (!n) { + header = (struct ipoib_header *) skb->data; + + if (unlikely(cb->hwaddr[4] == 0xff)) { + /* multicast, arrange "if" according to probability */ + if ((header->proto != htons(ETH_P_IP)) && + (header->proto != htons(ETH_P_IPV6)) && + (header->proto != htons(ETH_P_ARP)) && + (header->proto != htons(ETH_P_RARP))) { + /* ethertype not supported by IPoIB */ ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - goto unlock; + return NETDEV_TX_OK; } + /* Add in the P_Key for multicast*/ + cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; + cb->hwaddr[9] = priv->pkey & 0xff; + + neigh = ipoib_neigh_get(dev, cb->hwaddr); + if (likely(neigh)) + goto send_using_neigh; + ipoib_mcast_send(dev, cb->hwaddr, skb); + return NETDEV_TX_OK; } - if (likely(n)) { - if (unlikely(!*to_ipoib_neigh(n))) { - ipoib_path_lookup(skb, n, dev); - goto unlock; - } - - neigh = *to_ipoib_neigh(n); - if (unlikely((memcmp(&neigh->dgid.raw, - n->ha + 4, - sizeof(union ib_gid))) || - (neigh->dev != dev))) { - spin_lock_irqsave(&priv->lock, flags); - /* - * It's safe to call ipoib_put_ah() inside - * priv->lock here, because we know that - * path->ah will always hold one more reference, - * so ipoib_put_ah() will never do more than - * decrement the ref count. - */ - if (neigh->ah) - ipoib_put_ah(neigh->ah); - list_del(&neigh->list); - ipoib_neigh_free(dev, neigh); - spin_unlock_irqrestore(&priv->lock, flags); - ipoib_path_lookup(skb, n, dev); - goto unlock; + /* unicast, arrange "switch" according to probability */ + switch (header->proto) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + neigh = ipoib_neigh_get(dev, cb->hwaddr); + if (unlikely(!neigh)) { + neigh_add_path(skb, cb->hwaddr, dev); + return NETDEV_TX_OK; } + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + /* for unicast ARP and RARP should always perform path find */ + unicast_arp_send(skb, dev, cb); + return NETDEV_TX_OK; + default: + /* ethertype not supported by IPoIB */ + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } - if (ipoib_cm_get(neigh)) { - if (ipoib_cm_up(neigh)) { - ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - goto unlock; - } - } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha)); - goto unlock; +send_using_neigh: + /* note we now hold a ref to neigh */ + if (ipoib_cm_get(neigh)) { + if (ipoib_cm_up(neigh)) { + ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); + goto unref; } + } else if (neigh->ah) { + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr)); + goto unref; + } - if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - spin_lock_irqsave(&priv->lock, flags); - __skb_queue_tail(&neigh->queue, skb); - spin_unlock_irqrestore(&priv->lock, flags); - } else { - ++dev->stats.tx_dropped; - dev_kfree_skb_any(skb); - } + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + spin_lock_irqsave(&priv->lock, flags); + __skb_queue_tail(&neigh->queue, skb); + spin_unlock_irqrestore(&priv->lock, flags); } else { - struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; - - if (cb->hwaddr[4] == 0xff) { - /* Add in the P_Key for multicast*/ - cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; - cb->hwaddr[9] = priv->pkey & 0xff; + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + } - ipoib_mcast_send(dev, cb->hwaddr + 4, skb); - } else { - /* unicast GID -- should be ARP or RARP reply */ - - if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && - (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { - ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", - skb_dst(skb) ? "neigh" : "dst", - be16_to_cpup((__be16 *) skb->data), - IPOIB_QPN(cb->hwaddr), - cb->hwaddr + 4); - dev_kfree_skb_any(skb); - ++dev->stats.tx_dropped; - goto unlock; - } +unref: + ipoib_neigh_put(neigh); - unicast_arp_send(skb, dev, cb); - } - } -unlock: - rcu_read_unlock(); return NETDEV_TX_OK; } @@ -819,6 +770,7 @@ static int ipoib_hard_header(struct sk_buff *skb, const void *daddr, const void *saddr, unsigned len) { struct ipoib_header *header; + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -826,14 +778,11 @@ static int ipoib_hard_header(struct sk_buff *skb, header->reserved = 0; /* - * If we don't have a dst_entry structure, stuff the + * we don't rely on dst_entry structure, always stuff the * destination address into skb->cb so we can figure out where * to send the packet later. */ - if (!skb_dst(skb)) { - struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; - memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); - } + memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); return 0; } @@ -850,86 +799,438 @@ static void ipoib_set_mcast_list(struct net_device *dev) queue_work(ipoib_workqueue, &priv->restart_task); } -static void ipoib_neigh_cleanup(struct neighbour *n) +static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) { - struct ipoib_neigh *neigh; - struct ipoib_dev_priv *priv = netdev_priv(n->dev); + /* + * Use only the address parts that contributes to spreading + * The subnet prefix is not used as one can not connect to + * same remote port (GUID) using the same remote QPN via two + * different subnets. + */ + /* qpn octets[1:4) & port GUID octets[12:20) */ + u32 *daddr_32 = (u32 *) daddr; + u32 hv; + + hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0); + return hv & htbl->mask; +} + +struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh *neigh = NULL; + u32 hash_val; + + rcu_read_lock_bh(); + + htbl = rcu_dereference_bh(ntbl->htbl); + + if (!htbl) + goto out_unlock; + + hash_val = ipoib_addr_hash(htbl, daddr); + for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]); + neigh != NULL; + neigh = rcu_dereference_bh(neigh->hnext)) { + if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { + /* found, take one ref on behalf of the caller */ + if (!atomic_inc_not_zero(&neigh->refcnt)) { + /* deleted */ + neigh = NULL; + goto out_unlock; + } + neigh->alive = jiffies; + goto out_unlock; + } + } + +out_unlock: + rcu_read_unlock_bh(); + return neigh; +} + +static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) +{ + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + unsigned long neigh_obsolete; + unsigned long dt; unsigned long flags; - struct ipoib_ah *ah = NULL; + int i; - neigh = *to_ipoib_neigh(n); - if (neigh) - priv = netdev_priv(neigh->dev); - else + if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; - ipoib_dbg(priv, - "neigh_cleanup for %06x %pI6\n", - IPOIB_QPN(n->ha), - n->ha + 4); - spin_lock_irqsave(&priv->lock, flags); + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + + if (!htbl) + goto out_unlock; + + /* neigh is obsolete if it was idle for two GC periods */ + dt = 2 * arp_tbl.gc_interval; + neigh_obsolete = jiffies - dt; + /* handle possible race condition */ + if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) + goto out_unlock; + + for (i = 0; i < htbl->size; i++) { + struct ipoib_neigh *neigh; + struct ipoib_neigh __rcu **np = &htbl->buckets[i]; + + while ((neigh = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock))) != NULL) { + /* was the neigh idle for two GC periods */ + if (time_after(neigh_obsolete, neigh->alive)) { + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + /* remove from path/mc list */ + spin_lock_irqsave(&priv->lock, flags); + list_del(&neigh->list); + spin_unlock_irqrestore(&priv->lock, flags); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + } else { + np = &neigh->hnext; + } - if (neigh->ah) - ah = neigh->ah; - list_del(&neigh->list); - ipoib_neigh_free(n->dev, neigh); + } + } - spin_unlock_irqrestore(&priv->lock, flags); +out_unlock: + write_unlock_bh(&ntbl->rwlock); +} - if (ah) - ipoib_put_ah(ah); +static void ipoib_reap_neigh(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, neigh_reap_task.work); + + __ipoib_reap_neigh(priv); + + if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) + queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, + arp_tbl.gc_interval); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, + +static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, struct net_device *dev) { struct ipoib_neigh *neigh; - neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); + neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); if (!neigh) return NULL; - neigh->neighbour = neighbour; neigh->dev = dev; - memset(&neigh->dgid.raw, 0, sizeof (union ib_gid)); - *to_ipoib_neigh(neighbour) = neigh; + memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr)); skb_queue_head_init(&neigh->queue); + INIT_LIST_HEAD(&neigh->list); ipoib_cm_set(neigh, NULL); + /* one ref on behalf of the caller */ + atomic_set(&neigh->refcnt, 1); + + return neigh; +} + +struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, + struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh *neigh; + u32 hash_val; + + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + if (!htbl) { + neigh = NULL; + goto out_unlock; + } + + /* need to add a new neigh, but maybe some other thread succeeded? + * recalc hash, maybe hash resize took place so we do a search + */ + hash_val = ipoib_addr_hash(htbl, daddr); + for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], + lockdep_is_held(&ntbl->rwlock)); + neigh != NULL; + neigh = rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))) { + if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { + /* found, take one ref on behalf of the caller */ + if (!atomic_inc_not_zero(&neigh->refcnt)) { + /* deleted */ + neigh = NULL; + break; + } + neigh->alive = jiffies; + goto out_unlock; + } + } + + neigh = ipoib_neigh_ctor(daddr, dev); + if (!neigh) + goto out_unlock; + + /* one ref on behalf of the hash table */ + atomic_inc(&neigh->refcnt); + neigh->alive = jiffies; + /* put in hash */ + rcu_assign_pointer(neigh->hnext, + rcu_dereference_protected(htbl->buckets[hash_val], + lockdep_is_held(&ntbl->rwlock))); + rcu_assign_pointer(htbl->buckets[hash_val], neigh); + atomic_inc(&ntbl->entries); + +out_unlock: + write_unlock_bh(&ntbl->rwlock); return neigh; } -void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) +void ipoib_neigh_dtor(struct ipoib_neigh *neigh) { + /* neigh reference count was dropprd to zero */ + struct net_device *dev = neigh->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; - *to_ipoib_neigh(neigh->neighbour) = NULL; + if (neigh->ah) + ipoib_put_ah(neigh->ah); while ((skb = __skb_dequeue(&neigh->queue))) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } if (ipoib_cm_get(neigh)) ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); + ipoib_dbg(netdev_priv(dev), + "neigh free for %06x %pI6\n", + IPOIB_QPN(neigh->daddr), + neigh->daddr + 4); kfree(neigh); + if (atomic_dec_and_test(&priv->ntbl.entries)) { + if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags)) + complete(&priv->ntbl.flushed); + } +} + +static void ipoib_neigh_reclaim(struct rcu_head *rp) +{ + /* Called as a result of removal from hash table */ + struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu); + /* note TX context may hold another ref */ + ipoib_neigh_put(neigh); } -static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) +void ipoib_neigh_free(struct ipoib_neigh *neigh) { - parms->neigh_cleanup = ipoib_neigh_cleanup; + struct net_device *dev = neigh->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh __rcu **np; + struct ipoib_neigh *n; + u32 hash_val; + + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + if (!htbl) + goto out_unlock; + + hash_val = ipoib_addr_hash(htbl, neigh->daddr); + np = &htbl->buckets[hash_val]; + for (n = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock)); + n != NULL; + n = rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))) { + if (n == neigh) { + /* found */ + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + goto out_unlock; + } else { + np = &n->hnext; + } + } + +out_unlock: + write_unlock_bh(&ntbl->rwlock); + +} + +static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) +{ + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh **buckets; + u32 size; + + clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); + ntbl->htbl = NULL; + rwlock_init(&ntbl->rwlock); + htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); + if (!htbl) + return -ENOMEM; + set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + size = roundup_pow_of_two(arp_tbl.gc_thresh3); + buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL); + if (!buckets) { + kfree(htbl); + return -ENOMEM; + } + htbl->size = size; + htbl->mask = (size - 1); + htbl->buckets = buckets; + ntbl->htbl = htbl; + atomic_set(&ntbl->entries, 0); + + /* start garbage collection */ + clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, + arp_tbl.gc_interval); return 0; } +static void neigh_hash_free_rcu(struct rcu_head *head) +{ + struct ipoib_neigh_hash *htbl = container_of(head, + struct ipoib_neigh_hash, + rcu); + struct ipoib_neigh __rcu **buckets = htbl->buckets; + + kfree(buckets); + kfree(htbl); +} + +void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + unsigned long flags; + int i; + + /* remove all neigh connected to a given path or mcast */ + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + + if (!htbl) + goto out_unlock; + + for (i = 0; i < htbl->size; i++) { + struct ipoib_neigh *neigh; + struct ipoib_neigh __rcu **np = &htbl->buckets[i]; + + while ((neigh = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock))) != NULL) { + /* delete neighs belong to this parent */ + if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + /* remove from parent list */ + spin_lock_irqsave(&priv->lock, flags); + list_del(&neigh->list); + spin_unlock_irqrestore(&priv->lock, flags); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + } else { + np = &neigh->hnext; + } + + } + } +out_unlock: + write_unlock_bh(&ntbl->rwlock); +} + +static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) +{ + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + unsigned long flags; + int i; + + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + if (!htbl) + goto out_unlock; + + for (i = 0; i < htbl->size; i++) { + struct ipoib_neigh *neigh; + struct ipoib_neigh __rcu **np = &htbl->buckets[i]; + + while ((neigh = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock))) != NULL) { + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + /* remove from path/mc list */ + spin_lock_irqsave(&priv->lock, flags); + list_del(&neigh->list); + spin_unlock_irqrestore(&priv->lock, flags); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + } + } + + rcu_assign_pointer(ntbl->htbl, NULL); + call_rcu(&htbl->rcu, neigh_hash_free_rcu); + +out_unlock: + write_unlock_bh(&ntbl->rwlock); +} + +static void ipoib_neigh_hash_uninit(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int stopped; + + ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); + init_completion(&priv->ntbl.flushed); + set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); + + /* Stop GC if called at init fail need to cancel work */ + stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + if (!stopped) + cancel_delayed_work(&priv->neigh_reap_task); + + if (atomic_read(&priv->ntbl.entries)) { + ipoib_flush_neighs(priv); + wait_for_completion(&priv->ntbl.flushed); + } +} + + int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) { struct ipoib_dev_priv *priv = netdev_priv(dev); + if (ipoib_neigh_hash_init(priv) < 0) + goto out; /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, ipoib_recvq_size); - goto out; + goto out_neigh_hash_cleanup; } priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); @@ -952,6 +1253,8 @@ out_tx_ring_cleanup: out_rx_ring_cleanup: kfree(priv->rx_ring); +out_neigh_hash_cleanup: + ipoib_neigh_hash_uninit(dev); out: return -ENOMEM; } @@ -964,6 +1267,9 @@ void ipoib_dev_cleanup(struct net_device *dev) /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { + /* Stop GC on child */ + set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); + cancel_delayed_work(&cpriv->neigh_reap_task); unregister_netdev(cpriv->dev); ipoib_dev_cleanup(cpriv->dev); free_netdev(cpriv->dev); @@ -976,6 +1282,8 @@ void ipoib_dev_cleanup(struct net_device *dev) priv->rx_ring = NULL; priv->tx_ring = NULL; + + ipoib_neigh_hash_uninit(dev); } static const struct header_ops ipoib_header_ops = { @@ -990,7 +1298,6 @@ static const struct net_device_ops ipoib_netdev_ops = { .ndo_start_xmit = ipoib_start_xmit, .ndo_tx_timeout = ipoib_timeout, .ndo_set_rx_mode = ipoib_set_mcast_list, - .ndo_neigh_setup = ipoib_neigh_setup_dev, }; static void ipoib_setup(struct net_device *dev) @@ -1039,6 +1346,7 @@ static void ipoib_setup(struct net_device *dev) INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); + INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); } struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) @@ -1279,6 +1587,9 @@ sysfs_failed: register_failed: ib_unregister_event_handler(&priv->event_handler); + /* Stop GC if started before flush */ + set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + cancel_delayed_work(&priv->neigh_reap_task); flush_workqueue(ipoib_workqueue); event_failed: @@ -1345,6 +1656,9 @@ static void ipoib_remove_one(struct ib_device *device) dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); rtnl_unlock(); + /* Stop GC */ + set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + cancel_delayed_work(&priv->neigh_reap_task); flush_workqueue(ipoib_workqueue); unregister_netdev(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 20ebc6fd1bb..13f4aa7593c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -69,28 +69,13 @@ struct ipoib_mcast_iter { static void ipoib_mcast_free(struct ipoib_mcast *mcast) { struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_neigh *neigh, *tmp; int tx_dropped = 0; ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", mcast->mcmember.mgid.raw); - spin_lock_irq(&priv->lock); - - list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { - /* - * It's safe to call ipoib_put_ah() inside priv->lock - * here, because we know that mcast->ah will always - * hold one more reference, so ipoib_put_ah() will - * never do more than decrement the ref count. - */ - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); - } - - spin_unlock_irq(&priv->lock); + /* remove all neigh connected to this mcast */ + ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); if (mcast->ah) ipoib_put_ah(mcast->ah); @@ -655,11 +640,12 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) return 0; } -void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) +void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_mcast *mcast; unsigned long flags; + void *mgid = daddr + 4; spin_lock_irqsave(&priv->lock, flags); @@ -715,25 +701,25 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = NULL; - - rcu_read_lock(); - if (dst) - n = dst_get_neighbour_noref(dst); - if (n && !*to_ipoib_neigh(n)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(n, - skb->dev); + struct ipoib_neigh *neigh; + spin_unlock_irqrestore(&priv->lock, flags); + neigh = ipoib_neigh_get(dev, daddr); + spin_lock_irqsave(&priv->lock, flags); + if (!neigh) { + spin_unlock_irqrestore(&priv->lock, flags); + neigh = ipoib_neigh_alloc(daddr, dev); + spin_lock_irqsave(&priv->lock, flags); if (neigh) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; list_add_tail(&neigh->list, &mcast->neigh_list); } } - rcu_read_unlock(); spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); + if (neigh) + ipoib_neigh_put(neigh); return; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 5f6b7f63cde..7a0ce8d4288 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1377,10 +1377,14 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) break; case SRPT_STATE_NEED_DATA: /* DMA_TO_DEVICE (write) - RDMA read error. */ + + /* XXX(hch): this is a horrible layering violation.. */ spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); ioctx->cmd.transport_state |= CMD_T_LUN_STOP; + ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); - transport_generic_handle_data(&ioctx->cmd); + + complete(&ioctx->cmd.transport_lun_stop_comp); break; case SRPT_STATE_CMD_RSP_SENT: /* @@ -1463,9 +1467,10 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, /** * srpt_handle_rdma_comp() - Process an IB RDMA completion notification. * - * Note: transport_generic_handle_data() is asynchronous so unmapping the - * data that has been transferred via IB RDMA must be postponed until the - * check_stop_free() callback. + * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping + * the data that has been transferred via IB RDMA had to be postponed until the + * check_stop_free() callback. None of this is nessecary anymore and needs to + * be cleaned up. */ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, @@ -1477,7 +1482,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, if (opcode == SRPT_RDMA_READ_LAST) { if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, SRPT_STATE_DATA_IN)) - transport_generic_handle_data(&ioctx->cmd); + target_execute_cmd(&ioctx->cmd); else printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__, __LINE__, srpt_get_cmd_state(ioctx)); |