summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx4
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx4')
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c1
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c57
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c246
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h26
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c90
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c8
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c6
10 files changed, 395 insertions, 50 deletions
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 2d8c3397774..f50a546224a 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -36,6 +36,7 @@
#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>
+#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 0eb141c4141..a31e031afd8 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
- if (slave_id >= dev->dev->num_vfs + 1)
+ if (slave_id >= dev->dev->persist->num_vfs + 1)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index a3b70f6c403..543ecdd8667 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -188,6 +188,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+ INIT_LIST_HEAD(&cq->send_qp_list);
+ INIT_LIST_HEAD(&cq->recv_qp_list);
if (context) {
struct mlx4_ib_create_cq ucmd;
@@ -594,6 +596,55 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
return 0;
}
+static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
+ struct ib_wc *wc, int *npolled, int is_send)
+{
+ struct mlx4_ib_wq *wq;
+ unsigned cur;
+ int i;
+
+ wq = is_send ? &qp->sq : &qp->rq;
+ cur = wq->head - wq->tail;
+
+ if (cur == 0)
+ return;
+
+ for (i = 0; i < cur && *npolled < num_entries; i++) {
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = MLX4_CQE_SYNDROME_WR_FLUSH_ERR;
+ wq->tail++;
+ (*npolled)++;
+ wc->qp = &qp->ibqp;
+ wc++;
+ }
+}
+
+static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries,
+ struct ib_wc *wc, int *npolled)
+{
+ struct mlx4_ib_qp *qp;
+
+ *npolled = 0;
+ /* Find uncompleted WQEs belonging to that cq and retrun
+ * simulated FLUSH_ERR completions
+ */
+ list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) {
+ mlx4_ib_qp_sw_comp(qp, num_entries, wc, npolled, 1);
+ if (*npolled >= num_entries)
+ goto out;
+ }
+
+ list_for_each_entry(qp, &cq->recv_qp_list, cq_recv_list) {
+ mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 0);
+ if (*npolled >= num_entries)
+ goto out;
+ }
+
+out:
+ return;
+}
+
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -836,8 +887,13 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
unsigned long flags;
int npolled;
int err = 0;
+ struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
spin_lock_irqsave(&cq->lock, flags);
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx4_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
+ goto out;
+ }
for (npolled = 0; npolled < num_entries; ++npolled) {
err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
@@ -847,6 +903,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
mlx4_cq_set_ci(&cq->mcq);
+out:
spin_unlock_irqrestore(&cq->lock, flags);
if (err == 0 || err == -EAGAIN)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 82a7dd87089..c7619716c31 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->ib_dev = &dev->ib_dev;
for (i = 0;
- i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
+ i < min(dev->dev->caps.sqp_demux,
+ (u16)(dev->dev->persist->num_vfs + 1));
i++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev->dev, i);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 9117b7a2d5f..eb8e215f161 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
- props->vendor_part_id = dev->dev->pdev->device;
+ props->vendor_part_id = dev->dev->persist->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
@@ -351,6 +351,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
enum ib_mtu tmp;
struct mlx4_cmd_mailbox *mailbox;
int err = 0;
+ int is_bonded = mlx4_is_bonded(mdev->dev);
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
@@ -374,8 +375,12 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
+ if (is_bonded)
+ rtnl_lock(); /* required to get upper dev */
spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
+ if (ndev && is_bonded)
+ ndev = netdev_master_upper_dev_get(ndev);
if (!ndev)
goto out_unlock;
@@ -387,6 +392,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock_bh(&iboe->lock);
+ if (is_bonded)
+ rtnl_unlock();
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
@@ -844,7 +851,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
struct mlx4_ib_steering {
struct list_head list;
- u64 reg_id;
+ struct mlx4_flow_reg_id reg_id;
union ib_gid gid;
};
@@ -1135,9 +1142,11 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
- int err = 0, i = 0;
+ int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
enum mlx4_net_trans_promisc_mode type[2];
+ struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
+ int is_bonded = mlx4_is_bonded(dev);
memset(type, 0, sizeof(type));
@@ -1172,26 +1181,58 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
while (i < ARRAY_SIZE(type) && type[i]) {
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
- &mflow->reg_id[i]);
+ &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
i++;
+ if (is_bonded) {
+ /* Application always sees one port so the mirror rule
+ * must be on port #2
+ */
+ flow_attr->port = 2;
+ err = __mlx4_ib_create_flow(qp, flow_attr,
+ domain, type[j],
+ &mflow->reg_id[j].mirror);
+ flow_attr->port = 1;
+ if (err)
+ goto err_create_flow;
+ j++;
+ }
+
}
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]);
+ err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
+ &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
i++;
+ if (is_bonded) {
+ flow_attr->port = 2;
+ err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
+ &mflow->reg_id[j].mirror);
+ flow_attr->port = 1;
+ if (err)
+ goto err_create_flow;
+ j++;
+ }
+ /* function to create mirror rule */
}
return &mflow->ibflow;
err_create_flow:
while (i) {
- (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]);
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
+ mflow->reg_id[i].id);
i--;
}
+
+ while (j) {
+ (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
+ mflow->reg_id[j].mirror);
+ j--;
+ }
err_free:
kfree(mflow);
return ERR_PTR(err);
@@ -1204,10 +1245,16 @@ static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
struct mlx4_ib_flow *mflow = to_mflow(flow_id);
- while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
- err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+ while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
if (err)
ret = err;
+ if (mflow->reg_id[i].mirror) {
+ err = __mlx4_ib_destroy_flow(mdev->dev,
+ mflow->reg_id[i].mirror);
+ if (err)
+ ret = err;
+ }
i++;
}
@@ -1219,11 +1266,12 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
+ struct mlx4_flow_reg_id reg_id;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1235,10 +1283,21 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- prot, &reg_id);
+ prot, &reg_id.id);
if (err)
goto err_malloc;
+ reg_id.mirror = 0;
+ if (mlx4_is_bonded(dev)) {
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
+ (mqp->port == 1) ? 2 : 1,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ prot, &reg_id.mirror);
+ if (err)
+ goto err_add;
+ }
+
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
@@ -1254,7 +1313,10 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- prot, reg_id);
+ prot, reg_id.id);
+ if (reg_id.mirror)
+ mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id.mirror);
err_malloc:
kfree(ib_steering);
@@ -1281,10 +1343,12 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
- u64 reg_id = 0;
+ struct mlx4_flow_reg_id reg_id = {0, 0};
+
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
@@ -1309,10 +1373,17 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- prot, reg_id);
+ prot, reg_id.id);
if (err)
return err;
+ if (mlx4_is_bonded(dev)) {
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id.mirror);
+ if (err)
+ return err;
+ }
+
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
if (ge) {
@@ -1376,7 +1447,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
- return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
+ return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
@@ -1440,6 +1511,7 @@ static void update_gids_task(struct work_struct *work)
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
+ int is_bonded = mlx4_is_bonded(dev);
if (!gw->dev->ib_active)
return;
@@ -1459,7 +1531,10 @@ static void update_gids_task(struct work_struct *work)
if (err)
pr_warn("set port command failed\n");
else
- mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+ if ((gw->port == 1) || !is_bonded)
+ mlx4_ib_dispatch_event(gw->dev,
+ is_bonded ? 1 : gw->port,
+ IB_EVENT_GID_CHANGE);
mlx4_free_cmd_mailbox(dev, mailbox);
kfree(gw);
@@ -1875,7 +1950,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
* don't want the bond IP based gids in the table since
* flows that select port by gid may get the down port.
*/
- if (port_state == IB_PORT_DOWN) {
+ if (port_state == IB_PORT_DOWN &&
+ !mlx4_is_bonded(ibdev->dev)) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev,
curr_netdev,
@@ -1938,7 +2014,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
int i;
if (mlx4_is_master(ibdev->dev)) {
- for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
+ for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
+ ++slave) {
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
@@ -1995,7 +2072,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
- i, j, dev->pdev->bus->name);
+ i, j, dev->persist->pdev->bus->name);
/* Set IRQ for specific name (per ring) */
if (mlx4_assign_eq(dev, name, NULL,
&ibdev->eq_table[eq])) {
@@ -2046,6 +2123,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int err;
struct mlx4_ib_iboe *iboe;
int ib_num_ports = 0;
+ int num_req_counters;
pr_info_once("%s", mlx4_ib_version);
@@ -2059,7 +2137,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
- dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
+ dev_err(&dev->persist->pdev->dev,
+ "Device struct alloc failed\n");
return NULL;
}
@@ -2078,15 +2157,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
+ ibdev->bond_next_port = 0;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
- ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
+ ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
+ 1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
- ibdev->ib_dev.dma_device = &dev->pdev->dev;
+ ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2205,7 +2286,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (init_node_data(ibdev))
goto err_map;
- for (i = 0; i < ibdev->num_ports; ++i) {
+ num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
+ for (i = 0; i < num_req_counters; ++i) {
mutex_init(&ibdev->qp1_proxy_lock[i]);
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
@@ -2216,12 +2298,18 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->counters[i] = -1;
}
}
+ if (mlx4_is_bonded(dev))
+ for (i = 1; i < ibdev->num_ports ; ++i)
+ ibdev->counters[i] = ibdev->counters[0];
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
+ INIT_LIST_HEAD(&ibdev->qp_list);
+ spin_lock_init(&ibdev->reset_flow_resource_lock);
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
ib_num_ports) {
@@ -2237,7 +2325,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
sizeof(long),
GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap) {
- dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ dev_err(&dev->persist->pdev->dev,
+ "bit map alloc failed\n");
goto err_steer_qp_release;
}
@@ -2535,6 +2624,99 @@ out:
return;
}
+static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
+{
+ struct mlx4_ib_qp *mqp;
+ unsigned long flags_qp;
+ unsigned long flags_cq;
+ struct mlx4_ib_cq *send_mcq, *recv_mcq;
+ struct list_head cq_notify_list;
+ struct mlx4_cq *mcq;
+ unsigned long flags;
+
+ pr_warn("mlx4_ib_handle_catas_error was started\n");
+ INIT_LIST_HEAD(&cq_notify_list);
+
+ /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
+ spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
+
+ list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
+ spin_lock_irqsave(&mqp->sq.lock, flags_qp);
+ if (mqp->sq.tail != mqp->sq.head) {
+ send_mcq = to_mcq(mqp->ibqp.send_cq);
+ spin_lock_irqsave(&send_mcq->lock, flags_cq);
+ if (send_mcq->mcq.comp &&
+ mqp->ibqp.send_cq->comp_handler) {
+ if (!send_mcq->mcq.reset_notify_added) {
+ send_mcq->mcq.reset_notify_added = 1;
+ list_add_tail(&send_mcq->mcq.reset_notify,
+ &cq_notify_list);
+ }
+ }
+ spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
+ }
+ spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
+ /* Now, handle the QP's receive queue */
+ spin_lock_irqsave(&mqp->rq.lock, flags_qp);
+ /* no handling is needed for SRQ */
+ if (!mqp->ibqp.srq) {
+ if (mqp->rq.tail != mqp->rq.head) {
+ recv_mcq = to_mcq(mqp->ibqp.recv_cq);
+ spin_lock_irqsave(&recv_mcq->lock, flags_cq);
+ if (recv_mcq->mcq.comp &&
+ mqp->ibqp.recv_cq->comp_handler) {
+ if (!recv_mcq->mcq.reset_notify_added) {
+ recv_mcq->mcq.reset_notify_added = 1;
+ list_add_tail(&recv_mcq->mcq.reset_notify,
+ &cq_notify_list);
+ }
+ }
+ spin_unlock_irqrestore(&recv_mcq->lock,
+ flags_cq);
+ }
+ }
+ spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
+ }
+
+ list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
+ mcq->comp(mcq);
+ }
+ spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
+ pr_warn("mlx4_ib_handle_catas_error ended\n");
+}
+
+static void handle_bonded_port_state_event(struct work_struct *work)
+{
+ struct ib_event_work *ew =
+ container_of(work, struct ib_event_work, work);
+ struct mlx4_ib_dev *ibdev = ew->ib_dev;
+ enum ib_port_state bonded_port_state = IB_PORT_NOP;
+ int i;
+ struct ib_event ibev;
+
+ kfree(ew);
+ spin_lock_bh(&ibdev->iboe.lock);
+ for (i = 0; i < MLX4_MAX_PORTS; ++i) {
+ struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
+
+ enum ib_port_state curr_port_state =
+ (netif_running(curr_netdev) &&
+ netif_carrier_ok(curr_netdev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+
+ bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
+ curr_port_state : IB_PORT_ACTIVE;
+ }
+ spin_unlock_bh(&ibdev->iboe.lock);
+
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = 1;
+ ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+
+ ib_dispatch_event(&ibev);
+}
+
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
enum mlx4_dev_event event, unsigned long param)
{
@@ -2544,6 +2726,18 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
struct ib_event_work *ew;
int p = 0;
+ if (mlx4_is_bonded(dev) &&
+ ((event == MLX4_DEV_EVENT_PORT_UP) ||
+ (event == MLX4_DEV_EVENT_PORT_DOWN))) {
+ ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
+ if (!ew)
+ return;
+ INIT_WORK(&ew->work, handle_bonded_port_state_event);
+ ew->ib_dev = ibdev;
+ queue_work(wq, &ew->work);
+ return;
+ }
+
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
eqe = (struct mlx4_eqe *)param;
else
@@ -2570,6 +2764,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
+ mlx4_ib_handle_catas_error(ibdev);
break;
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
@@ -2604,7 +2799,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
ibev.device = ibdev_ptr;
- ibev.element.port_num = (u8) p;
+ ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
}
@@ -2613,7 +2808,8 @@ static struct mlx4_interface mlx4_ib_interface = {
.add = mlx4_ib_add,
.remove = mlx4_ib_remove,
.event = mlx4_ib_event,
- .protocol = MLX4_PROT_IB_IPV6
+ .protocol = MLX4_PROT_IB_IPV6,
+ .flags = MLX4_INTFF_BONDING
};
static int __init mlx4_ib_init(void)
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6eb743f65f6..f829fd935b7 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -110,6 +110,9 @@ struct mlx4_ib_cq {
struct mutex resize_mutex;
struct ib_umem *umem;
struct ib_umem *resize_umem;
+ /* List of qps that it serves.*/
+ struct list_head send_qp_list;
+ struct list_head recv_qp_list;
};
struct mlx4_ib_mr {
@@ -134,10 +137,17 @@ struct mlx4_ib_fmr {
struct mlx4_fmr mfmr;
};
+#define MAX_REGS_PER_FLOW 2
+
+struct mlx4_flow_reg_id {
+ u64 id;
+ u64 mirror;
+};
+
struct mlx4_ib_flow {
struct ib_flow ibflow;
/* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
- u64 reg_id[2];
+ struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW];
};
struct mlx4_ib_wq {
@@ -293,6 +303,9 @@ struct mlx4_ib_qp {
struct mlx4_roce_smac_vlan_info pri;
struct mlx4_roce_smac_vlan_info alt;
u64 reg_id;
+ struct list_head qps_list;
+ struct list_head cq_recv_list;
+ struct list_head cq_send_list;
};
struct mlx4_ib_srq {
@@ -527,6 +540,10 @@ struct mlx4_ib_dev {
struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
/* lock when destroying qp1_proxy and getting netdev events */
struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
+ u8 bond_next_port;
+ /* protect resources needed as part of reset flow */
+ spinlock_t reset_flow_resource_lock;
+ struct list_head qp_list;
};
struct ib_event_work {
@@ -622,6 +639,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
+static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
+{
+ dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports;
+
+ return dev->bond_next_port + 1;
+}
+
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index c36ccbd9a64..e0d271782d0 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
if (!mfrpl->ibfrpl.page_list)
goto err_free;
- mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+ mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
+ pdev->dev,
size, &mfrpl->map,
GFP_KERNEL);
if (!mfrpl->mapped_page_list)
@@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
int size = page_list->max_page_list_len * sizeof (u64);
- dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
+ dma_free_coherent(&dev->dev->persist->pdev->dev, size,
+ mfrpl->mapped_page_list,
mfrpl->map);
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index cf000b7ad64..dfc6ca128a7 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -40,11 +40,17 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
+#include <linux/mlx4/driver.h>
#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
#include "user.h"
+static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
+ struct mlx4_ib_cq *recv_cq);
+static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
+ struct mlx4_ib_cq *recv_cq);
+
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
};
@@ -93,17 +99,6 @@ enum {
#ifndef ETH_ALEN
#define ETH_ALEN 6
#endif
-static inline u64 mlx4_mac_to_u64(u8 *addr)
-{
- u64 mac = 0;
- int i;
-
- for (i = 0; i < ETH_ALEN; i++) {
- mac <<= 8;
- mac |= addr[i];
- }
- return mac;
-}
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
@@ -628,6 +623,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct mlx4_ib_sqp *sqp;
struct mlx4_ib_qp *qp;
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
+ struct mlx4_ib_cq *mcq;
+ unsigned long flags;
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
@@ -838,6 +835,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->mqp.event = mlx4_ib_qp_event;
if (!*caller_qp)
*caller_qp = qp;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ /* Maintain device to QPs access, needed for further handling
+ * via reset flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling
+ * via reset flow
+ */
+ mcq = to_mcq(init_attr->send_cq);
+ list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
+ mcq = to_mcq(init_attr->recv_cq);
+ list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
+ mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
return 0;
err_qpn:
@@ -896,13 +911,13 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
if (send_cq == recv_cq) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
}
}
@@ -912,13 +927,13 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
{
if (send_cq == recv_cq) {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
}
}
@@ -963,6 +978,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
int is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
+ unsigned long flags;
if (qp->state != IB_QPS_RESET) {
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
@@ -994,8 +1010,13 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
get_cqs(qp, &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(send_cq, recv_cq);
+ /* del from lists under both locks above to protect reset flow paths */
+ list_del(&qp->qps_list);
+ list_del(&qp->cq_send_list);
+ list_del(&qp->cq_recv_list);
if (!is_user) {
__mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
@@ -1006,6 +1027,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_remove(dev->dev, &qp->mqp);
mlx4_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
mlx4_qp_free(dev->dev, &qp->mqp);
@@ -1915,6 +1937,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
+ if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
+ if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
+ if ((ibqp->qp_type == IB_QPT_RC) ||
+ (ibqp->qp_type == IB_QPT_UD) ||
+ (ibqp->qp_type == IB_QPT_UC) ||
+ (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
+ (ibqp->qp_type == IB_QPT_XRC_INI)) {
+ attr->port_num = mlx4_ib_bond_next_port(dev);
+ }
+ } else {
+ /* no sense in changing port_num
+ * when ports are bonded */
+ attr_mask &= ~IB_QP_PORT;
+ }
+ }
+
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->num_ports)) {
pr_debug("qpn 0x%x: invalid port number (%d) specified "
@@ -1965,6 +2003,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
+ attr->port_num = 1;
+
out:
mutex_unlock(&qp->mutex);
return err;
@@ -2609,8 +2650,15 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
__be32 uninitialized_var(lso_hdr_sz);
__be32 blh;
int i;
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
spin_lock_irqsave(&qp->sq.lock, flags);
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
ind = qp->sq_next_wqe;
@@ -2908,10 +2956,18 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int ind;
int max_gs;
int i;
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
max_gs = qp->rq.max_gs;
spin_lock_irqsave(&qp->rq.lock, flags);
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 62d9285300a..dce5dfe3a70 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -316,8 +316,15 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
int err = 0;
int nreq;
int i;
+ struct mlx4_ib_dev *mdev = to_mdev(ibsrq->device);
spin_lock_irqsave(&srq->lock, flags);
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
@@ -362,6 +369,7 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
+out:
spin_unlock_irqrestore(&srq->lock, flags);
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index cb4c66e723b..d10c2b8a5da 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
char base_name[9];
/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
- strlcpy(name, pci_name(dev->dev->pdev), max);
+ strlcpy(name, pci_name(dev->dev->persist->pdev), max);
strncpy(base_name, name, 8); /*till xxxx:yy:*/
base_name[8] = '\0';
/* with no ARI only 3 last bits are used so when the fn is higher than 8
@@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return 0;
- for (i = 0; i <= device->dev->num_vfs; ++i)
+ for (i = 0; i <= device->dev->persist->num_vfs; ++i)
register_one_pkey_tree(device, i);
return 0;
@@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return;
- for (slave = device->dev->num_vfs; slave >= 0; --slave) {
+ for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) {
list_for_each_entry_safe(p, t,
&device->pkeys.pkey_port_list[slave],
entry) {