From 4c3eb3ca13966508bcb64f39dcdef48be22f1731 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 17:19:22 +0300 Subject: IB/mlx4: Add VLAN support for IBoE This patch allows IBoE traffic to be encapsulated in 802.1Q tagged VLAN frames. The VLAN tag is encoded in the GID and derived from it by a simple computation. The netdev notifier callback is modified to catch VLAN device addition/removal and the port's GID table is updated to reflect the change, so that for each netdevice there is an entry in the GID table. When the port's GID table is exhausted, GID entries will not be added. Only children of the main interfaces can add to the GID table; if a VLAN interface is added on another VLAN interface (e.g. "vconfig add eth2.6 8"), then that interfaces will not add an entry to the GID table. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 79 +++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw/mlx4/qp.c') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 26964844511..9a7794ac34c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -37,6 +37,7 @@ #include #include +#include #include @@ -57,10 +58,11 @@ enum { enum { /* * Largest possible UD header: send with GRH and immediate - * data plus 14 bytes for an Ethernet header. (LRH would only - * use 8 bytes, so Ethernet is the biggest case) + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) */ - MLX4_IB_UD_HEADER_SIZE = 78, + MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, }; @@ -879,6 +881,8 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, IB_LINK_LAYER_ETHERNET; u8 mac[6]; int is_mcast; + u16 vlan_tag; + int vidx; path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); @@ -907,10 +911,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, memcpy(path->rgid, ah->grh.dgid.raw, 16); } - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 0xf) << 2); - if (is_eth) { + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1); + if (!(ah->ah_flags & IB_AH_GRH)) return -1; @@ -922,7 +926,18 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, path->ackto = MLX4_IB_LINK_TYPE_ETH; /* use index 0 into MAC table for IBoE */ path->grh_mylmc &= 0x80; - } + + vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + if (vlan_tag < 0x1000) { + if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) + return -ENOENT; + + path->vlan_index = vidx; + path->fl = 1 << 6; + } + } else + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); return 0; } @@ -1277,13 +1292,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; int is_eth; + int is_vlan = 0; int is_grh; + u16 vlan; send_size = 0; for (i = 0; i < wr->num_sge; ++i) @@ -1291,7 +1309,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); - ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header); + if (is_eth) { + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + vlan = rdma_get_vlan_id(&sgid); + is_vlan = vlan < 0x1000; + } + ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); if (!is_eth) { sqp->ud_header.lrh.service_level = @@ -1345,7 +1369,15 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); - sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + if (!is_vlan) { + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + u16 pcp; + + sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13; + sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); + } } else { sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) @@ -1507,13 +1539,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_send_wr *wr, __be16 *vlan) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + *vlan = dseg->vlan; } static void set_mlx_icrc_seg(void *dseg) @@ -1616,6 +1649,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; + __be16 vlan = cpu_to_be16(0xffff); spin_lock_irqsave(&qp->sq.lock, flags); @@ -1719,7 +1753,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, wr, &vlan); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1797,6 +1831,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; + if (be16_to_cpu(vlan) < 0x1000) { + ctrl->ins_vlan = 1 << 6; + ctrl->vlan_tag = vlan; + } + stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); @@ -1946,17 +1985,27 @@ static int to_ib_qp_access_flags(int mlx4_flags) return ib_flags; } -static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, +static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { + struct mlx4_dev *dev = ibdev->dev; + int is_eth; + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; + is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) + ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | + ((path->sched_queue & 4) << 1); + else + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->dlid = be16_to_cpu(path->rlid); - ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; @@ -2009,8 +2058,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); - to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } -- cgit v1.2.3-70-g09d2