From 3c86aa70bf677a31b71c8292e349242e26cbc743 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 13 Oct 2010 21:26:51 +0200 Subject: RDMA/cm: Add RDMA CM support for IBoE devices Add support for IBoE device binding and IP --> GID resolution. Path resolving and multicast joining are implemented within cma.c by filling in the responses and running callbacks in the CMA work queue. IP --> GID resolution always yields IPv6 link local addresses; remote GIDs are derived from the destination MAC address of the remote port. Multicast GIDs are always mapped to multicast MACs as is done in IPv6. (IPv4 multicast is enabled by translating IPv4 multicast addresses to IPv6 multicast as described in .) Some helper functions are added to ib_addr.h. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) (limited to 'include/rdma/ib_addr.h') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index fa0d52b8e62..904ffa92fc9 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -40,6 +40,7 @@ #include #include #include +#include struct rdma_addr_client { atomic_t refcount; @@ -63,6 +64,7 @@ struct rdma_dev_addr { unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; + enum rdma_transport_type transport; }; /** @@ -127,9 +129,31 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } +static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac) +{ + memset(gid->raw, 0, 16); + *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000); + gid->raw[12] = 0xfe; + gid->raw[11] = 0xff; + memcpy(gid->raw + 13, mac + 3, 3); + memcpy(gid->raw + 8, mac, 3); + gid->raw[8] ^= 2; +} + +static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + iboe_mac_to_ll(gid, dev_addr->src_dev_addr); +} + static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); + if (dev_addr->transport == RDMA_TRANSPORT_IB && + dev_addr->dev_type != ARPHRD_INFINIBAND) + iboe_addr_get_sgid(dev_addr, gid); + else + memcpy(gid, dev_addr->src_dev_addr + + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) @@ -147,4 +171,77 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } +static inline enum ib_mtu iboe_get_mtu(int mtu) +{ + /* + * reduce IB headers from effective IBoE MTU. 28 stands for + * atomic header which is the biggest possible header after BTH + */ + mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + + if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) + return IB_MTU_4096; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) + return IB_MTU_2048; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) + return IB_MTU_1024; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) + return IB_MTU_512; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) + return IB_MTU_256; + else + return 0; +} + +static inline int iboe_get_rate(struct net_device *dev) +{ + struct ethtool_cmd cmd; + + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings || + dev->ethtool_ops->get_settings(dev, &cmd)) + return IB_RATE_PORT_CURRENT; + + if (cmd.speed >= 40000) + return IB_RATE_40_GBPS; + else if (cmd.speed >= 30000) + return IB_RATE_30_GBPS; + else if (cmd.speed >= 20000) + return IB_RATE_20_GBPS; + else if (cmd.speed >= 10000) + return IB_RATE_10_GBPS; + else + return IB_RATE_PORT_CURRENT; +} + +static inline int rdma_link_local_addr(struct in6_addr *addr) +{ + if (addr->s6_addr32[0] == htonl(0xfe800000) && + addr->s6_addr32[1] == 0) + return 1; + + return 0; +} + +static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) +{ + memcpy(mac, &addr->s6_addr[8], 3); + memcpy(mac + 3, &addr->s6_addr[13], 3); + mac[0] ^= 2; +} + +static inline int rdma_is_multicast_addr(struct in6_addr *addr) +{ + return addr->s6_addr[0] == 0xff; +} + +static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) +{ + int i; + + mac[0] = 0x33; + mac[1] = 0x33; + for (i = 2; i < 6; ++i) + mac[i] = addr->s6_addr[i + 10]; +} + #endif /* IB_ADDR_H */ -- cgit v1.2.3-70-g09d2 From af7bd463761c6abd8ca8d831f9cc0ac19f3b7d4b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 17:18:59 +0300 Subject: IB/core: Add VLAN support for IBoE Add 802.1q VLAN support to IBoE. The VLAN tag is encoded within the GID derived from a link local address in the following way: GID[11] GID[12] contain the VLAN ID when the GID contains a VLAN. The 3 bits user priority field of the packets are identical to the 3 bits of the SL. In case of rdma_cm apps, the TOS field is used to generate the SL field by doing a shift right of 5 bits effectively taking to 3 MS bits of the TOS field. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 20 +++++++++------- drivers/infiniband/core/ucma.c | 12 ++++++++-- drivers/infiniband/core/ud_header.c | 23 ++++++++++++++++++ drivers/infiniband/hw/mlx4/qp.c | 2 +- drivers/infiniband/hw/mthca/mthca_qp.c | 2 +- include/rdma/ib_addr.h | 43 ++++++++++++++++++++++++++++++---- include/rdma/ib_pack.h | 9 +++++++ 7 files changed, 95 insertions(+), 16 deletions(-) (limited to 'include/rdma/ib_addr.h') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f61bc073848..6884da24fde 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1791,6 +1791,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; struct net_device *ndev = NULL; + u16 vid; if (src_addr->sin_family != dst_addr->sin_family) return -EINVAL; @@ -1810,14 +1811,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; - iboe_mac_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr); - iboe_mac_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr); - - route->path_rec->hop_limit = 1; - route->path_rec->reversible = 1; - route->path_rec->pkey = cpu_to_be16(0xffff); - route->path_rec->mtu_selector = IB_SA_EQ; - if (addr->dev_addr.bound_dev_if) ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); if (!ndev) { @@ -1825,6 +1818,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } + vid = rdma_vlan_dev_vlan_id(ndev); + + iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); + iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); + + route->path_rec->hop_limit = 1; + route->path_rec->reversible = 1; + route->path_rec->pkey = cpu_to_be16(0xffff); + route->path_rec->mtu_selector = IB_SA_EQ; + route->path_rec->sl = id_priv->tos >> 5; + route->path_rec->mtu = iboe_get_mtu(ndev->mtu); route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3d3c9264c45..357a766bd22 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -587,13 +587,21 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; + struct net_device *dev; + u16 vid = 0; resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: dev_addr = &route->addr.dev_addr; - iboe_mac_to_ll((union ib_gid *) &resp->ib_route[0].dgid, - dev_addr->dst_dev_addr); + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (dev) { + vid = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + } + + iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, + dev_addr->dst_dev_addr, vid); iboe_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(0xffff); diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index cb0dd5ae277..bb7e1928082 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -33,6 +33,7 @@ #include #include +#include #include @@ -103,6 +104,17 @@ static const struct ib_field eth_table[] = { .size_bits = 16 } }; +static const struct ib_field vlan_table[] = { + { STRUCT_FIELD(vlan, tag), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(vlan, type), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 } +}; + static const struct ib_field grh_table[] = { { STRUCT_FIELD(grh, ip_version), .offset_words = 0, @@ -205,6 +217,7 @@ static const struct ib_field deth_table[] = { * @payload_bytes:Length of packet payload * @lrh_present: specify if LRH is present * @eth_present: specify if Eth header is present + * @vlan_present: packet is tagged vlan * @grh_present:GRH flag (if non-zero, GRH will be included) * @immediate_present: specify if immediate data is present * @header:Structure to initialize @@ -212,6 +225,7 @@ static const struct ib_field deth_table[] = { void ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, + int vlan_present, int grh_present, int immediate_present, struct ib_ud_header *header) @@ -234,6 +248,9 @@ void ib_ud_header_init(int payload_bytes, header->lrh.packet_length = cpu_to_be16(packet_length); } + if (vlan_present) + header->eth.type = cpu_to_be16(ETH_P_8021Q); + if (grh_present) { header->grh.ip_version = 6; header->grh.payload_length = @@ -254,6 +271,7 @@ void ib_ud_header_init(int payload_bytes, header->lrh_present = lrh_present; header->eth_present = eth_present; + header->vlan_present = vlan_present; header->grh_present = grh_present; header->immediate_present = immediate_present; } @@ -312,6 +330,11 @@ int ib_ud_header_pack(struct ib_ud_header *header, &header->eth, buf + len); len += IB_ETH_BYTES; } + if (header->vlan_present) { + ib_pack(vlan_table, ARRAY_SIZE(vlan_table), + &header->vlan, buf + len); + len += IB_VLAN_BYTES; + } if (header->grh_present) { ib_pack(grh_table, ARRAY_SIZE(grh_table), &header->grh, buf + len); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 17f60fe6e5b..26964844511 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1291,7 +1291,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); - ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header); + ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header); if (!is_eth) { sqp->ud_header.lrh.service_level = diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 1a1c55fb13f..a34c9d38e82 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1493,7 +1493,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, int err; u16 pkey; - ib_ud_header_init(256, /* assume a MAD */ 1, 0, + ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, &sqp->ud_header); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 904ffa92fc9..b5fc9f39122 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -129,21 +130,41 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac) +static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid) { memset(gid->raw, 0, 16); *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000); - gid->raw[12] = 0xfe; - gid->raw[11] = 0xff; + if (vid < 0x1000) { + gid->raw[12] = vid & 0xff; + gid->raw[11] = vid >> 8; + } else { + gid->raw[12] = 0xfe; + gid->raw[11] = 0xff; + } memcpy(gid->raw + 13, mac + 3, 3); memcpy(gid->raw + 8, mac, 3); gid->raw[8] ^= 2; } +static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN ? + vlan_dev_vlan_id(dev) : 0xffff; +} + static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - iboe_mac_to_ll(gid, dev_addr->src_dev_addr); + struct net_device *dev; + u16 vid = 0xffff; + + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (dev) { + vid = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + } + + iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid); } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) @@ -244,4 +265,18 @@ static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) mac[i] = addr->s6_addr[i + 10]; } +static inline u16 rdma_get_vlan_id(union ib_gid *dgid) +{ + u16 vid; + + vid = dgid->raw[11] << 8 | dgid->raw[12]; + return vid < 0x1000 ? vid : 0xffff; +} + +static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN ? + vlan_dev_real_dev(dev) : 0; +} + #endif /* IB_ADDR_H */ diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 6b91d8e7a1f..b37fe3b10a9 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -38,6 +38,7 @@ enum { IB_LRH_BYTES = 8, IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 @@ -219,11 +220,18 @@ struct ib_unpacked_eth { __be16 type; }; +struct ib_unpacked_vlan { + __be16 tag; + __be16 type; +}; + struct ib_ud_header { int lrh_present; struct ib_unpacked_lrh lrh; int eth_present; struct ib_unpacked_eth eth; + int vlan_present; + struct ib_unpacked_vlan vlan; int grh_present; struct ib_unpacked_grh grh; struct ib_unpacked_bth bth; @@ -245,6 +253,7 @@ void ib_unpack(const struct ib_field *desc, void ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, + int vlan_present, int grh_present, int immediate_present, struct ib_ud_header *header); -- cgit v1.2.3-70-g09d2