diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx4')
-rw-r--r-- | drivers/infiniband/hw/mlx4/ah.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/alias_GUID.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/cq.c | 57 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/mad.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/main.c | 246 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/mlx4_ib.h | 26 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/mr.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 90 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/srq.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/sysfs.c | 6 |
10 files changed, 395 insertions, 50 deletions
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 2d8c3397774..f50a546224a 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -36,6 +36,7 @@ #include <linux/slab.h> #include <linux/inet.h> #include <linux/string.h> +#include <linux/mlx4/driver.h> #include "mlx4_ib.h" diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 0eb141c4141..a31e031afd8 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_vfs + 1) + if (slave_id >= dev->dev->persist->num_vfs + 1) return; tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a3b70f6c403..543ecdd8667 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -188,6 +188,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + INIT_LIST_HEAD(&cq->send_qp_list); + INIT_LIST_HEAD(&cq->recv_qp_list); if (context) { struct mlx4_ib_create_cq ucmd; @@ -594,6 +596,55 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct return 0; } +static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled, int is_send) +{ + struct mlx4_ib_wq *wq; + unsigned cur; + int i; + + wq = is_send ? &qp->sq : &qp->rq; + cur = wq->head - wq->tail; + + if (cur == 0) + return; + + for (i = 0; i < cur && *npolled < num_entries; i++) { + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX4_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + (*npolled)++; + wc->qp = &qp->ibqp; + wc++; + } +} + +static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx4_ib_qp *qp; + + *npolled = 0; + /* Find uncompleted WQEs belonging to that cq and retrun + * simulated FLUSH_ERR completions + */ + list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) { + mlx4_ib_qp_sw_comp(qp, num_entries, wc, npolled, 1); + if (*npolled >= num_entries) + goto out; + } + + list_for_each_entry(qp, &cq->recv_qp_list, cq_recv_list) { + mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 0); + if (*npolled >= num_entries) + goto out; + } + +out: + return; +} + static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_qp **cur_qp, struct ib_wc *wc) @@ -836,8 +887,13 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) unsigned long flags; int npolled; int err = 0; + struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device); spin_lock_irqsave(&cq->lock, flags); + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mlx4_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + goto out; + } for (npolled = 0; npolled < num_entries; ++npolled) { err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); @@ -847,6 +903,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) mlx4_cq_set_ci(&cq->mcq); +out: spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 82a7dd87089..c7619716c31 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->ib_dev = &dev->ib_dev; for (i = 0; - i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i < min(dev->dev->caps.sqp_demux, + (u16)(dev->dev->persist->num_vfs + 1)); i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev->dev, i); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 9117b7a2d5f..eb8e215f161 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = dev->dev->pdev->device; + props->vendor_part_id = dev->dev->persist->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); @@ -351,6 +351,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, enum ib_mtu tmp; struct mlx4_cmd_mailbox *mailbox; int err = 0; + int is_bonded = mlx4_is_bonded(mdev->dev); mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) @@ -374,8 +375,12 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; + if (is_bonded) + rtnl_lock(); /* required to get upper dev */ spin_lock_bh(&iboe->lock); ndev = iboe->netdevs[port - 1]; + if (ndev && is_bonded) + ndev = netdev_master_upper_dev_get(ndev); if (!ndev) goto out_unlock; @@ -387,6 +392,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->phys_state = state_to_phys_state(props->state); out_unlock: spin_unlock_bh(&iboe->lock); + if (is_bonded) + rtnl_unlock(); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; @@ -844,7 +851,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, struct mlx4_ib_steering { struct list_head list; - u64 reg_id; + struct mlx4_flow_reg_id reg_id; union ib_gid gid; }; @@ -1135,9 +1142,11 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) { - int err = 0, i = 0; + int err = 0, i = 0, j = 0; struct mlx4_ib_flow *mflow; enum mlx4_net_trans_promisc_mode type[2]; + struct mlx4_dev *dev = (to_mdev(qp->device))->dev; + int is_bonded = mlx4_is_bonded(dev); memset(type, 0, sizeof(type)); @@ -1172,26 +1181,58 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, while (i < ARRAY_SIZE(type) && type[i]) { err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], - &mflow->reg_id[i]); + &mflow->reg_id[i].id); if (err) goto err_create_flow; i++; + if (is_bonded) { + /* Application always sees one port so the mirror rule + * must be on port #2 + */ + flow_attr->port = 2; + err = __mlx4_ib_create_flow(qp, flow_attr, + domain, type[j], + &mflow->reg_id[j].mirror); + flow_attr->port = 1; + if (err) + goto err_create_flow; + j++; + } + } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { - err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]); + err = mlx4_ib_tunnel_steer_add(qp, flow_attr, + &mflow->reg_id[i].id); if (err) goto err_create_flow; i++; + if (is_bonded) { + flow_attr->port = 2; + err = mlx4_ib_tunnel_steer_add(qp, flow_attr, + &mflow->reg_id[j].mirror); + flow_attr->port = 1; + if (err) + goto err_create_flow; + j++; + } + /* function to create mirror rule */ } return &mflow->ibflow; err_create_flow: while (i) { - (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]); + (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, + mflow->reg_id[i].id); i--; } + + while (j) { + (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, + mflow->reg_id[j].mirror); + j--; + } err_free: kfree(mflow); return ERR_PTR(err); @@ -1204,10 +1245,16 @@ static int mlx4_ib_destroy_flow(struct ib_flow *flow_id) struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device); struct mlx4_ib_flow *mflow = to_mflow(flow_id); - while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) { - err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]); + while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) { + err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id); if (err) ret = err; + if (mflow->reg_id[i].mirror) { + err = __mlx4_ib_destroy_flow(mdev->dev, + mflow->reg_id[i].mirror); + if (err) + ret = err; + } i++; } @@ -1219,11 +1266,12 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_dev *dev = mdev->dev; struct mlx4_ib_qp *mqp = to_mqp(ibqp); - u64 reg_id; struct mlx4_ib_steering *ib_steering = NULL; enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; + struct mlx4_flow_reg_id reg_id; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { @@ -1235,10 +1283,21 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - prot, ®_id); + prot, ®_id.id); if (err) goto err_malloc; + reg_id.mirror = 0; + if (mlx4_is_bonded(dev)) { + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, + (mqp->port == 1) ? 2 : 1, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + prot, ®_id.mirror); + if (err) + goto err_add; + } + err = add_gid_entry(ibqp, gid); if (err) goto err_add; @@ -1254,7 +1313,10 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err_add: mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - prot, reg_id); + prot, reg_id.id); + if (reg_id.mirror) + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + prot, reg_id.mirror); err_malloc: kfree(ib_steering); @@ -1281,10 +1343,12 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_dev *dev = mdev->dev; struct mlx4_ib_qp *mqp = to_mqp(ibqp); struct net_device *ndev; struct mlx4_ib_gid_entry *ge; - u64 reg_id = 0; + struct mlx4_flow_reg_id reg_id = {0, 0}; + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; @@ -1309,10 +1373,17 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - prot, reg_id); + prot, reg_id.id); if (err) return err; + if (mlx4_is_bonded(dev)) { + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + prot, reg_id.mirror); + if (err) + return err; + } + mutex_lock(&mqp->mutex); ge = find_gid_entry(mqp, gid->raw); if (ge) { @@ -1376,7 +1447,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); - return sprintf(buf, "MT%d\n", dev->dev->pdev->device); + return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, @@ -1440,6 +1511,7 @@ static void update_gids_task(struct work_struct *work) union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; + int is_bonded = mlx4_is_bonded(dev); if (!gw->dev->ib_active) return; @@ -1459,7 +1531,10 @@ static void update_gids_task(struct work_struct *work) if (err) pr_warn("set port command failed\n"); else - mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); + if ((gw->port == 1) || !is_bonded) + mlx4_ib_dispatch_event(gw->dev, + is_bonded ? 1 : gw->port, + IB_EVENT_GID_CHANGE); mlx4_free_cmd_mailbox(dev, mailbox); kfree(gw); @@ -1875,7 +1950,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, * don't want the bond IP based gids in the table since * flows that select port by gid may get the down port. */ - if (port_state == IB_PORT_DOWN) { + if (port_state == IB_PORT_DOWN && + !mlx4_is_bonded(ibdev->dev)) { reset_gid_table(ibdev, port); mlx4_ib_set_default_gid(ibdev, curr_netdev, @@ -1938,7 +2014,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) int i; if (mlx4_is_master(ibdev->dev)) { - for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { + for (slave = 0; slave <= ibdev->dev->persist->num_vfs; + ++slave) { for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; @@ -1995,7 +2072,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + i, j, dev->persist->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { @@ -2046,6 +2123,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int err; struct mlx4_ib_iboe *iboe; int ib_num_ports = 0; + int num_req_counters; pr_info_once("%s", mlx4_ib_version); @@ -2059,7 +2137,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { - dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "Device struct alloc failed\n"); return NULL; } @@ -2078,15 +2157,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); ibdev->dev = dev; + ibdev->bond_next_port = 0; strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->num_ports = num_ports; - ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; + ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ? + 1 : ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; - ibdev->ib_dev.dma_device = &dev->pdev->dev; + ibdev->ib_dev.dma_device = &dev->persist->pdev->dev; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2205,7 +2286,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (init_node_data(ibdev)) goto err_map; - for (i = 0; i < ibdev->num_ports; ++i) { + num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; + for (i = 0; i < num_req_counters; ++i) { mutex_init(&ibdev->qp1_proxy_lock[i]); if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { @@ -2216,12 +2298,18 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->counters[i] = -1; } } + if (mlx4_is_bonded(dev)) + for (i = 1; i < ibdev->num_ports ; ++i) + ibdev->counters[i] = ibdev->counters[0]; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) ib_num_ports++; spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); + INIT_LIST_HEAD(&ibdev->qp_list); + spin_lock_init(&ibdev->reset_flow_resource_lock); if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED && ib_num_ports) { @@ -2237,7 +2325,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) sizeof(long), GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) { - dev_err(&dev->pdev->dev, "bit map alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "bit map alloc failed\n"); goto err_steer_qp_release; } @@ -2535,6 +2624,99 @@ out: return; } +static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev) +{ + struct mlx4_ib_qp *mqp; + unsigned long flags_qp; + unsigned long flags_cq; + struct mlx4_ib_cq *send_mcq, *recv_mcq; + struct list_head cq_notify_list; + struct mlx4_cq *mcq; + unsigned long flags; + + pr_warn("mlx4_ib_handle_catas_error was started\n"); + INIT_LIST_HEAD(&cq_notify_list); + + /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ + spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); + + list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { + spin_lock_irqsave(&mqp->sq.lock, flags_qp); + if (mqp->sq.tail != mqp->sq.head) { + send_mcq = to_mcq(mqp->ibqp.send_cq); + spin_lock_irqsave(&send_mcq->lock, flags_cq); + if (send_mcq->mcq.comp && + mqp->ibqp.send_cq->comp_handler) { + if (!send_mcq->mcq.reset_notify_added) { + send_mcq->mcq.reset_notify_added = 1; + list_add_tail(&send_mcq->mcq.reset_notify, + &cq_notify_list); + } + } + spin_unlock_irqrestore(&send_mcq->lock, flags_cq); + } + spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); + /* Now, handle the QP's receive queue */ + spin_lock_irqsave(&mqp->rq.lock, flags_qp); + /* no handling is needed for SRQ */ + if (!mqp->ibqp.srq) { + if (mqp->rq.tail != mqp->rq.head) { + recv_mcq = to_mcq(mqp->ibqp.recv_cq); + spin_lock_irqsave(&recv_mcq->lock, flags_cq); + if (recv_mcq->mcq.comp && + mqp->ibqp.recv_cq->comp_handler) { + if (!recv_mcq->mcq.reset_notify_added) { + recv_mcq->mcq.reset_notify_added = 1; + list_add_tail(&recv_mcq->mcq.reset_notify, + &cq_notify_list); + } + } + spin_unlock_irqrestore(&recv_mcq->lock, + flags_cq); + } + } + spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); + } + + list_for_each_entry(mcq, &cq_notify_list, reset_notify) { + mcq->comp(mcq); + } + spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); + pr_warn("mlx4_ib_handle_catas_error ended\n"); +} + +static void handle_bonded_port_state_event(struct work_struct *work) +{ + struct ib_event_work *ew = + container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *ibdev = ew->ib_dev; + enum ib_port_state bonded_port_state = IB_PORT_NOP; + int i; + struct ib_event ibev; + + kfree(ew); + spin_lock_bh(&ibdev->iboe.lock); + for (i = 0; i < MLX4_MAX_PORTS; ++i) { + struct net_device *curr_netdev = ibdev->iboe.netdevs[i]; + + enum ib_port_state curr_port_state = + (netif_running(curr_netdev) && + netif_carrier_ok(curr_netdev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + + bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ? + curr_port_state : IB_PORT_ACTIVE; + } + spin_unlock_bh(&ibdev->iboe.lock); + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = 1; + ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + + ib_dispatch_event(&ibev); +} + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@ -2544,6 +2726,18 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, struct ib_event_work *ew; int p = 0; + if (mlx4_is_bonded(dev) && + ((event == MLX4_DEV_EVENT_PORT_UP) || + (event == MLX4_DEV_EVENT_PORT_DOWN))) { + ew = kmalloc(sizeof(*ew), GFP_ATOMIC); + if (!ew) + return; + INIT_WORK(&ew->work, handle_bonded_port_state_event); + ew->ib_dev = ibdev; + queue_work(wq, &ew->work); + return; + } + if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else @@ -2570,6 +2764,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; + mlx4_ib_handle_catas_error(ibdev); break; case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: @@ -2604,7 +2799,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, } ibev.device = ibdev_ptr; - ibev.element.port_num = (u8) p; + ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p; ib_dispatch_event(&ibev); } @@ -2613,7 +2808,8 @@ static struct mlx4_interface mlx4_ib_interface = { .add = mlx4_ib_add, .remove = mlx4_ib_remove, .event = mlx4_ib_event, - .protocol = MLX4_PROT_IB_IPV6 + .protocol = MLX4_PROT_IB_IPV6, + .flags = MLX4_INTFF_BONDING }; static int __init mlx4_ib_init(void) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6eb743f65f6..f829fd935b7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -110,6 +110,9 @@ struct mlx4_ib_cq { struct mutex resize_mutex; struct ib_umem *umem; struct ib_umem *resize_umem; + /* List of qps that it serves.*/ + struct list_head send_qp_list; + struct list_head recv_qp_list; }; struct mlx4_ib_mr { @@ -134,10 +137,17 @@ struct mlx4_ib_fmr { struct mlx4_fmr mfmr; }; +#define MAX_REGS_PER_FLOW 2 + +struct mlx4_flow_reg_id { + u64 id; + u64 mirror; +}; + struct mlx4_ib_flow { struct ib_flow ibflow; /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */ - u64 reg_id[2]; + struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW]; }; struct mlx4_ib_wq { @@ -293,6 +303,9 @@ struct mlx4_ib_qp { struct mlx4_roce_smac_vlan_info pri; struct mlx4_roce_smac_vlan_info alt; u64 reg_id; + struct list_head qps_list; + struct list_head cq_recv_list; + struct list_head cq_send_list; }; struct mlx4_ib_srq { @@ -527,6 +540,10 @@ struct mlx4_ib_dev { struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; /* lock when destroying qp1_proxy and getting netdev events */ struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; + u8 bond_next_port; + /* protect resources needed as part of reset flow */ + spinlock_t reset_flow_resource_lock; + struct list_head qp_list; }; struct ib_event_work { @@ -622,6 +639,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } +static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev) +{ + dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports; + + return dev->bond_next_port + 1; +} + int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index c36ccbd9a64..e0d271782d0 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device if (!mfrpl->ibfrpl.page_list) goto err_free; - mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist-> + pdev->dev, size, &mfrpl->map, GFP_KERNEL); if (!mfrpl->mapped_page_list) @@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); int size = page_list->max_page_list_len * sizeof (u64); - dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, + dma_free_coherent(&dev->dev->persist->pdev->dev, size, + mfrpl->mapped_page_list, mfrpl->map); kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index cf000b7ad64..dfc6ca128a7 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -40,11 +40,17 @@ #include <rdma/ib_addr.h> #include <rdma/ib_mad.h> +#include <linux/mlx4/driver.h> #include <linux/mlx4/qp.h> #include "mlx4_ib.h" #include "user.h" +static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, + struct mlx4_ib_cq *recv_cq); +static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, + struct mlx4_ib_cq *recv_cq); + enum { MLX4_IB_ACK_REQ_FREQ = 8, }; @@ -93,17 +99,6 @@ enum { #ifndef ETH_ALEN #define ETH_ALEN 6 #endif -static inline u64 mlx4_mac_to_u64(u8 *addr) -{ - u64 mac = 0; - int i; - - for (i = 0; i < ETH_ALEN; i++) { - mac <<= 8; - mac |= addr[i]; - } - return mac; -} static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), @@ -628,6 +623,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct mlx4_ib_sqp *sqp; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; + struct mlx4_ib_cq *mcq; + unsigned long flags; /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { @@ -838,6 +835,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->mqp.event = mlx4_ib_qp_event; if (!*caller_qp) *caller_qp = qp; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + /* Maintain device to QPs access, needed for further handling + * via reset flow + */ + list_add_tail(&qp->qps_list, &dev->qp_list); + /* Maintain CQ to QPs access, needed for further handling + * via reset flow + */ + mcq = to_mcq(init_attr->send_cq); + list_add_tail(&qp->cq_send_list, &mcq->send_qp_list); + mcq = to_mcq(init_attr->recv_cq); + list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list); + mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); return 0; err_qpn: @@ -896,13 +911,13 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { if (send_cq == recv_cq) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } @@ -912,13 +927,13 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re { if (send_cq == recv_cq) { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } } @@ -963,6 +978,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; + unsigned long flags; if (qp->state != IB_QPS_RESET) { if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), @@ -994,8 +1010,13 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, get_cqs(qp, &send_cq, &recv_cq); + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(send_cq, recv_cq); + /* del from lists under both locks above to protect reset flow paths */ + list_del(&qp->qps_list); + list_del(&qp->cq_send_list); + list_del(&qp->cq_recv_list); if (!is_user) { __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); @@ -1006,6 +1027,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_qp_remove(dev->dev, &qp->mqp); mlx4_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); mlx4_qp_free(dev->dev, &qp->mqp); @@ -1915,6 +1937,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) { + if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { + if ((ibqp->qp_type == IB_QPT_RC) || + (ibqp->qp_type == IB_QPT_UD) || + (ibqp->qp_type == IB_QPT_UC) || + (ibqp->qp_type == IB_QPT_RAW_PACKET) || + (ibqp->qp_type == IB_QPT_XRC_INI)) { + attr->port_num = mlx4_ib_bond_next_port(dev); + } + } else { + /* no sense in changing port_num + * when ports are bonded */ + attr_mask &= ~IB_QP_PORT; + } + } + if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->num_ports)) { pr_debug("qpn 0x%x: invalid port number (%d) specified " @@ -1965,6 +2003,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) + attr->port_num = 1; + out: mutex_unlock(&qp->mutex); return err; @@ -2609,8 +2650,15 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); spin_lock_irqsave(&qp->sq.lock, flags); + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } ind = qp->sq_next_wqe; @@ -2908,10 +2956,18 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int ind; int max_gs; int i; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 62d9285300a..dce5dfe3a70 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -316,8 +316,15 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, int err = 0; int nreq; int i; + struct mlx4_ib_dev *mdev = to_mdev(ibsrq->device); spin_lock_irqsave(&srq->lock, flags); + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(wr->num_sge > srq->msrq.max_gs)) { @@ -362,6 +369,7 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, *srq->db.db = cpu_to_be32(srq->wqe_ctr); } +out: spin_unlock_irqrestore(&srq->lock, flags); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index cb4c66e723b..d10c2b8a5da 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) char base_name[9]; /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->pdev), max); + strlcpy(name, pci_name(dev->dev->persist->pdev), max); strncpy(base_name, name, 8); /*till xxxx:yy:*/ base_name[8] = '\0'; /* with no ARI only 3 last bits are used so when the fn is higher than 8 @@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return 0; - for (i = 0; i <= device->dev->num_vfs; ++i) + for (i = 0; i <= device->dev->persist->num_vfs; ++i) register_one_pkey_tree(device, i); return 0; @@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return; - for (slave = device->dev->num_vfs; slave >= 0; --slave) { + for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) { list_for_each_entry_safe(p, t, &device->pkeys.pkey_port_list[slave], entry) { |