From 9807a54cd74149988f5d20088bf7a7957c205bfb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 7 Jan 2013 09:54:31 -0800 Subject: linux/openvswitch.h: Make OVSP_LOCAL 32-bit. OVS ports are now 32-bit, so OVSP_LOCAL should be too. (Internally, kernel module still keeps port numbers 16-bit, though.) Signed-off-by: Jarno Rajahalme Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index d42e174bd0c..99e6414a40d 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -94,7 +94,7 @@ struct ovs_vport_stats { }; /* Fixed logical ports. */ -#define OVSP_LOCAL ((__u16)0) +#define OVSP_LOCAL ((__u32)0) /* Packet transfer. */ -- cgit v1.2.3-70-g09d2 From 4490108b4a5ada14c7be712260829faecc814ae5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 15 Feb 2013 17:29:22 -0800 Subject: openvswitch: Allow OVS_USERSPACE_ATTR_USERDATA to be variable length. Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be exactly 64 bits long, if it was present. However, 64 bits is not enough space to associate as much information with a flow as would be convenient for some userspace features now under development. This commit generalizes the attribute, allowing it to be any length. This generalization is backward-compatible: if userspace only uses 64-bit attributes, then it will not see any change in behavior. CC: Romain Lenglet Signed-off-by: Ben Pfaff Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 11 ++++++----- net/openvswitch/datapath.c | 11 ++++++----- net/openvswitch/datapath.h | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 99e6414a40d..67d6c7b0358 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -127,7 +127,8 @@ enum ovs_packet_cmd { * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an - * %OVS_USERSPACE_ATTR_USERDATA attribute. + * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content + * specified there. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -137,7 +138,7 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_PACKET, /* Packet data. */ OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_PACKET_ATTR_USERDATA, /* u64 OVS_ACTION_ATTR_USERSPACE arg. */ + OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ __OVS_PACKET_ATTR_MAX }; @@ -389,13 +390,13 @@ enum ovs_sample_attr { * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION * message should be sent. Required. - * @OVS_USERSPACE_ATTR_USERDATA: If present, its u64 argument is copied to the - * %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA, + * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is + * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. */ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_UNSPEC, OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ - OVS_USERSPACE_ATTR_USERDATA, /* u64 optional user-specified cookie. */ + OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ __OVS_USERSPACE_ATTR_MAX }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f9d2438e643..96cd5b243d5 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -370,8 +370,8 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, len = sizeof(struct ovs_header); len += nla_total_size(skb->len); len += nla_total_size(FLOW_BUFSIZE); - if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) - len += nla_total_size(8); + if (upcall_info->userdata) + len += NLA_ALIGN(upcall_info->userdata->nla_len); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { @@ -388,8 +388,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, nla_nest_end(user_skb, nla); if (upcall_info->userdata) - nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, - nla_get_u64(upcall_info->userdata)); + __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, + nla_len(upcall_info->userdata), + nla_data(upcall_info->userdata)); nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); @@ -544,7 +545,7 @@ static int validate_userspace(const struct nlattr *attr) { static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 031dfbf37c9..9125ad5c5ae 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -119,7 +119,7 @@ struct ovs_skb_cb { * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. - * @userdata: If nonnull, its u64 value is extracted and passed to userspace as + * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no * packet is sent and the packet is accounted in the datapath's @n_lost -- cgit v1.2.3-70-g09d2 From 82dc3c63c692b1e1d59378ecee948ac88e034aad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Mar 2013 15:57:22 +0000 Subject: net: introduce NAPI_POLL_WEIGHT Some drivers use a too big NAPI poll weight. This patch adds a NAPI_POLL_WEIGHT default value and issues an error message if a driver attempts to use a bigger weight. Signed-off-by: Eric Dumazet Cc: Eilon Greenstein Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ net/core/dev.c | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3d00fa4b31..896eb4985f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1475,6 +1475,11 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) +/* Default NAPI poll() weight + * Device drivers are strongly advised to not use bigger value + */ +#define NAPI_POLL_WEIGHT 64 + /** * netif_napi_add - initialize a napi context * @dev: network device diff --git a/net/core/dev.c b/net/core/dev.c index a06a7a58dd1..96103894ad6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4057,6 +4057,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->gro_list = NULL; napi->skb = NULL; napi->poll = poll; + if (weight > NAPI_POLL_WEIGHT) + pr_err_once("netif_napi_add() called with weight %d on device %s\n", + weight, dev->name); napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; -- cgit v1.2.3-70-g09d2 From a947b0a93efa9a25c012aa88848f4cf8d9b41280 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 22 Feb 2013 10:54:54 +0100 Subject: xfrm: allow to avoid copying DSCP during encapsulation By default, DSCP is copying during encapsulation. Copying the DSCP in IPsec tunneling may be a bit dangerous because packets with different DSCP may get reordered relative to each other in the network and then dropped by the remote IPsec GW if the reordering becomes too big compared to the replay window. It is possible to avoid this copy with netfilter rules, but it's very convenient to be able to configure it for each SA directly. This patch adds a toogle for this purpose. By default, it's not set to maintain backward compatibility. Field flags in struct xfrm_usersa_info is full, hence I add a new attribute. Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + include/uapi/linux/xfrm.h | 3 +++ net/ipv4/ipcomp.c | 1 + net/ipv4/xfrm4_mode_tunnel.c | 8 ++++++-- net/ipv6/xfrm6_mode_tunnel.c | 7 +++++-- net/xfrm/xfrm_state.c | 1 + net/xfrm/xfrm_user.c | 13 +++++++++++++ 7 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 24c8886fd96..ae16531d0d3 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -162,6 +162,7 @@ struct xfrm_state { xfrm_address_t saddr; int header_len; int trailer_len; + u32 extra_flags; } props; struct xfrm_lifetime_cfg lft; diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 28e493b5b94..a8cd6a4a297 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -297,6 +297,7 @@ enum xfrm_attr_type_t { XFRMA_MARK, /* struct xfrm_mark */ XFRMA_TFCPAD, /* __u32 */ XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ + XFRMA_SA_EXTRA_FLAGS, /* __u32 */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -367,6 +368,8 @@ struct xfrm_usersa_info { #define XFRM_STATE_ESN 128 }; +#define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 + struct xfrm_usersa_id { xfrm_address_t daddr; __be32 spi; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index f01d1b1aff7..59cb8c76905 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; + t->props.extra_flags = x->props.extra_flags; memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index fe5189e2e11..eb1dd4d643f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); - /* DS disclosed */ - top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, + /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + top_iph->tos = 0; + else + top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; + top_iph->tos = INET_ECN_encapsulate(top_iph->tos, XFRM_MODE_SKB_CB(skb)->tos); flags = x->props.flags; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9bf6a74a71d..4770d515c2c 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) sizeof(top_iph->flow_lbl)); top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); - dsfield = XFRM_MODE_SKB_CB(skb)->tos; - dsfield = INET_ECN_encapsulate(dsfield, dsfield); + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + dsfield = 0; + else + dsfield = XFRM_MODE_SKB_CB(skb)->tos; + dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2c341bdaf47..78f66fa9244 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; x->props.flags = orig->props.flags; + x->props.extra_flags = orig->props.extra_flags; x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fbd9e6cd0fd..204cba192af 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, copy_from_user_state(x, p); + if (attrs[XFRMA_SA_EXTRA_FLAGS]) + x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); + if ((err = attach_aead(&x->aead, &x->props.ealgo, attrs[XFRMA_ALG_AEAD]))) goto error; @@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, copy_to_user_state(x, p); + if (x->props.extra_flags) { + ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, + x->props.extra_flags); + if (ret) + goto out; + } + if (x->coaddr) { ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); if (ret) @@ -2302,6 +2312,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, + [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, }; static struct xfrm_link { @@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) x->security->ctx_len); if (x->coaddr) l += nla_total_size(sizeof(*x->coaddr)); + if (x->props.extra_flags) + l += nla_total_size(sizeof(x->props.extra_flags)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size(sizeof(u64)); -- cgit v1.2.3-70-g09d2 From f8bacc210408f7a2a182f184a9fa1475b8a67440 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 23:27:01 +0100 Subject: cfg80211: clean up mesh plink station change API Make the ability to leave the plink_state unchanged not use a magic -1 variable that isn't in the enum, but an explicit change flag; reject invalid plink states or actions and move the needed constants for plink actions to the right header file. Also reject plink_state changes for non-mesh interfaces. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 ++------------- include/uapi/linux/nl80211.h | 20 +++++++++++++++++++- net/mac80211/cfg.c | 14 +++++++++++--- net/wireless/nl80211.c | 29 ++++++++++++++++++++++------- 4 files changed, 54 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d581c6de5d6..9b54574c3df 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -610,23 +610,11 @@ struct cfg80211_ap_settings { bool radar_required; }; -/** - * enum plink_action - actions to perform in mesh peers - * - * @PLINK_ACTION_INVALID: action 0 is reserved - * @PLINK_ACTION_OPEN: start mesh peer link establishment - * @PLINK_ACTION_BLOCK: block traffic from this mesh peer - */ -enum plink_actions { - PLINK_ACTION_INVALID, - PLINK_ACTION_OPEN, - PLINK_ACTION_BLOCK, -}; - /** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) * @STATION_PARAM_APPLY_CAPABILITY: apply new capability + * @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state * * Not all station parameters have in-band "no change" signalling, * for those that don't these flags will are used. @@ -634,6 +622,7 @@ enum plink_actions { enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), STATION_PARAM_APPLY_CAPABILITY = BIT(1), + STATION_PARAM_APPLY_PLINK_STATE = BIT(2), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c46bb016f4e..7dcc69f73d2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -884,7 +884,8 @@ enum nl80211_commands { * consisting of a nested array. * * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes). - * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link. + * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link + * (see &enum nl80211_plink_action). * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path. * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path * info given for %NL80211_CMD_GET_MPATH, nested attribute described at @@ -3307,6 +3308,23 @@ enum nl80211_plink_state { MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1 }; +/** + * enum nl80211_plink_action - actions to perform in mesh peers + * + * @NL80211_PLINK_ACTION_NO_ACTION: perform no action + * @NL80211_PLINK_ACTION_OPEN: start mesh peer link establishment + * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer + * @NUM_NL80211_PLINK_ACTIONS: number of possible actions + */ +enum plink_actions { + NL80211_PLINK_ACTION_NO_ACTION, + NL80211_PLINK_ACTION_OPEN, + NL80211_PLINK_ACTION_BLOCK, + + NUM_NL80211_PLINK_ACTIONS, +}; + + #define NL80211_KCK_LEN 16 #define NL80211_KEK_LEN 16 #define NL80211_REPLAY_CTR_LEN 8 diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb306814576..ca28405d5f6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1261,7 +1261,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH u32 changed = 0; - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED && + (params->sta_modify_mask & + STATION_PARAM_APPLY_PLINK_STATE)) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->plink_state != NL80211_PLINK_ESTAB) @@ -1292,12 +1294,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, /* nothing */ break; } + } else if (params->sta_modify_mask & + STATION_PARAM_APPLY_PLINK_STATE) { + return -EINVAL; } else { switch (params->plink_action) { - case PLINK_ACTION_OPEN: + case NL80211_PLINK_ACTION_NO_ACTION: + /* nothing */ + break; + case NL80211_PLINK_ACTION_OPEN: changed |= mesh_plink_open(sta); break; - case PLINK_ACTION_BLOCK: + case NL80211_PLINK_ACTION_BLOCK: changed |= mesh_plink_block(sta); break; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d44ab216c0e..9e7ece0e5e5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3412,7 +3412,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); params.listen_interval = -1; - params.plink_state = -1; if (info->attrs[NL80211_ATTR_STA_AID]) return -EINVAL; @@ -3451,13 +3450,20 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) + return -EINVAL; + } - if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) + if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) { params.plink_state = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (params.plink_state >= NUM_NL80211_PLINK_STATES) + return -EINVAL; + params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE; + } if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { enum nl80211_mesh_power_mode pm = nla_get_u32( @@ -3479,6 +3485,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.local_pm) return -EINVAL; + if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3542,6 +3550,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.local_pm) return -EINVAL; + if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; /* reject any changes other than AUTHORIZED or WME (for TDLS) */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_WME))) @@ -3553,6 +3563,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.local_pm) return -EINVAL; + if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || info->attrs[NL80211_ATTR_VHT_CAPABILITY]) return -EINVAL; @@ -3652,9 +3664,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); - if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) + return -EINVAL; + } if (!rdev->ops->add_station) return -EOPNOTSUPP; -- cgit v1.2.3-70-g09d2 From 2c1aabf33d1832befc5291a14c870cd09dc2182d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 23:33:40 +0100 Subject: cfg80211: constify station parameter pointers All the pointers point right into the skb data and not to anything that would be useful to change, so make them const. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9b54574c3df..7ca321d2b59 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -658,7 +658,7 @@ enum station_parameters_apply_mask { * @ext_capab_len: number of extended capabilities */ struct station_parameters { - u8 *supported_rates; + const u8 *supported_rates; struct net_device *vlan; u32 sta_flags_mask, sta_flags_set; u32 sta_modify_mask; @@ -667,13 +667,13 @@ struct station_parameters { u8 supported_rates_len; u8 plink_action; u8 plink_state; - struct ieee80211_ht_cap *ht_capa; - struct ieee80211_vht_cap *vht_capa; + const struct ieee80211_ht_cap *ht_capa; + const struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; enum nl80211_mesh_power_mode local_pm; u16 capability; - u8 *ext_capab; + const u8 *ext_capab; u8 ext_capab_len; }; -- cgit v1.2.3-70-g09d2 From 77ee7c891a04c3d254711ddf1bde5d7381339fb3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Feb 2013 00:48:33 +0100 Subject: cfg80211: comprehensively check station changes The station change API isn't being checked properly before drivers are called, and as a result it is difficult to see what should be allowed and what not. In order to comprehensively check the API parameters parse everything first, and then have the driver call a function (cfg80211_check_station_change()) with the additionally information about the kind of station that is being changed; this allows the function to make better decisions than the old code could. While at it, also add a few checks, particularly in mesh and clarify the TDLS station lifetime in documentation. To be able to reduce a few checks, ignore any flag set bits when the mask isn't set, they shouldn't be applied then. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 8 +- include/net/cfg80211.h | 48 ++++- include/uapi/linux/nl80211.h | 16 +- net/mac80211/cfg.c | 125 ++++++++----- net/wireless/nl80211.c | 288 +++++++++++++++++------------ 5 files changed, 311 insertions(+), 174 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 752ffc4f416..28c413f861a 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2990,13 +2990,15 @@ static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); + int err; if (vif->nw_type != AP_NETWORK) return -EOPNOTSUPP; - /* Use this only for authorizing/unauthorizing a station */ - if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) - return -EOPNOTSUPP; + err = cfg80211_check_station_change(wiphy, params, + CFG80211_STA_AP_MLME_CLIENT); + if (err) + return err; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED)) return ath6kl_wmi_ap_set_mlme(ar->wmi, vif->fw_vif_idx, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7ca321d2b59..ed2b08da3b9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -677,6 +677,49 @@ struct station_parameters { u8 ext_capab_len; }; +/** + * enum cfg80211_station_type - the type of station being modified + * @CFG80211_STA_AP_CLIENT: client of an AP interface + * @CFG80211_STA_AP_MLME_CLIENT: client of an AP interface that has + * the AP MLME in the device + * @CFG80211_STA_AP_STA: AP station on managed interface + * @CFG80211_STA_IBSS: IBSS station + * @CFG80211_STA_TDLS_PEER_SETUP: TDLS peer on managed interface (dummy entry + * while TDLS setup is in progress, it moves out of this state when + * being marked authorized; use this only if TDLS with external setup is + * supported/used) + * @CFG80211_STA_TDLS_PEER_ACTIVE: TDLS peer on managed interface (active + * entry that is operating, has been marked authorized by userspace) + * @CFG80211_STA_MESH_PEER_NONSEC: peer on mesh interface (non-secured) + * @CFG80211_STA_MESH_PEER_SECURE: peer on mesh interface (secured) + */ +enum cfg80211_station_type { + CFG80211_STA_AP_CLIENT, + CFG80211_STA_AP_MLME_CLIENT, + CFG80211_STA_AP_STA, + CFG80211_STA_IBSS, + CFG80211_STA_TDLS_PEER_SETUP, + CFG80211_STA_TDLS_PEER_ACTIVE, + CFG80211_STA_MESH_PEER_NONSEC, + CFG80211_STA_MESH_PEER_SECURE, +}; + +/** + * cfg80211_check_station_change - validate parameter changes + * @wiphy: the wiphy this operates on + * @params: the new parameters for a station + * @statype: the type of station being modified + * + * Utility function for the @change_station driver method. Call this function + * with the appropriate station type looking up the station (and checking that + * it exists). It will verify whether the station change is acceptable, and if + * not will return an error code. Note that it may modify the parameters for + * backward compatibility reasons, so don't use them before calling this. + */ +int cfg80211_check_station_change(struct wiphy *wiphy, + struct station_parameters *params, + enum cfg80211_station_type statype); + /** * enum station_info_flags - station information flags * @@ -1770,9 +1813,8 @@ struct cfg80211_gtk_rekey_data { * @change_station: Modify a given station. Note that flags changes are not much * validated in cfg80211, in particular the auth/assoc/authorized flags * might come to the driver in invalid combinations -- make sure to check - * them, also against the existing state! Also, supported_rates changes are - * not checked in station mode -- drivers need to reject (or ignore) them - * for anything but TDLS peers. + * them, also against the existing state! Drivers must call + * cfg80211_check_station_change() to validate the information. * @get_station: get station information for the station identified by @mac * @dump_station: dump station callback -- resume dump at index @idx * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7dcc69f73d2..523ed3d65b4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -36,7 +36,21 @@ * The station is still assumed to belong to the AP interface it was added * to. * - * TODO: need more info? + * Station handling varies per interface type and depending on the driver's + * capabilities. + * + * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS + * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows: + * - a setup station entry is added, not yet authorized, without any rate + * or capability information, this just exists to avoid race conditions + * - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid + * to add rate and capability information to the station and at the same + * time mark it authorized. + * - %NL80211_TDLS_ENABLE_LINK is then used + * - after this, the only valid operation is to remove it by tearing down + * the TDLS link (%NL80211_TDLS_DISABLE_LINK) + * + * TODO: need more info for other interface types */ /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ca28405d5f6..c115f82c037 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1177,6 +1177,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) set |= BIT(NL80211_STA_FLAG_ASSOCIATED); + } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + /* + * TDLS -- everything follows authorized, but + * only becoming authorized is possible, not + * going back + */ + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); + mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); + } } ret = sta_apply_auth_flags(local, sta, mask, set); @@ -1261,9 +1273,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH u32 changed = 0; - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED && - (params->sta_modify_mask & - STATION_PARAM_APPLY_PLINK_STATE)) { + + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->plink_state != NL80211_PLINK_ESTAB) @@ -1294,21 +1305,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, /* nothing */ break; } - } else if (params->sta_modify_mask & - STATION_PARAM_APPLY_PLINK_STATE) { - return -EINVAL; - } else { - switch (params->plink_action) { - case NL80211_PLINK_ACTION_NO_ACTION: - /* nothing */ - break; - case NL80211_PLINK_ACTION_OPEN: - changed |= mesh_plink_open(sta); - break; - case NL80211_PLINK_ACTION_BLOCK: - changed |= mesh_plink_block(sta); - break; - } + } + + switch (params->plink_action) { + case NL80211_PLINK_ACTION_NO_ACTION: + /* nothing */ + break; + case NL80211_PLINK_ACTION_OPEN: + changed |= mesh_plink_open(sta); + break; + case NL80211_PLINK_ACTION_BLOCK: + changed |= mesh_plink_block(sta); + break; } if (params->local_pm) @@ -1354,8 +1362,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, * defaults -- if userspace wants something else we'll * change it accordingly in sta_apply_parameters() */ - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { + sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + } err = sta_apply_parameters(local, sta, params); if (err) { @@ -1364,8 +1374,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } /* - * for TDLS, rate control should be initialized only when supported - * rates are known. + * for TDLS, rate control should be initialized only when + * rates are known and station is marked authorized */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) rate_control_rate_init(sta); @@ -1402,50 +1412,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, - u8 *mac, + struct net_device *dev, u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; + enum cfg80211_station_type statype; int err; mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mac); if (!sta) { - mutex_unlock(&local->sta_mtx); - return -ENOENT; + err = -ENOENT; + goto out_err; } - /* in station mode, some updates are only valid with TDLS */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - (params->supported_rates || params->ht_capa || params->vht_capa || - params->sta_modify_mask || - (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && - !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { - mutex_unlock(&local->sta_mtx); - return -EINVAL; + switch (sdata->vif.type) { + case NL80211_IFTYPE_MESH_POINT: + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) + statype = CFG80211_STA_MESH_PEER_SECURE; + else + statype = CFG80211_STA_MESH_PEER_NONSEC; + break; + case NL80211_IFTYPE_ADHOC: + statype = CFG80211_STA_IBSS; + break; + case NL80211_IFTYPE_STATION: + if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + statype = CFG80211_STA_AP_STA; + break; + } + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + statype = CFG80211_STA_TDLS_PEER_ACTIVE; + else + statype = CFG80211_STA_TDLS_PEER_SETUP; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + statype = CFG80211_STA_AP_CLIENT; + break; + default: + err = -EOPNOTSUPP; + goto out_err; } + err = cfg80211_check_station_change(wiphy, params, statype); + if (err) + goto out_err; + if (params->vlan && params->vlan != sta->sdata->dev) { bool prev_4addr = false; bool new_4addr = false; vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && - vlansdata->vif.type != NL80211_IFTYPE_AP) { - mutex_unlock(&local->sta_mtx); - return -EINVAL; - } - if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) { - mutex_unlock(&local->sta_mtx); - return -EBUSY; + err = -EBUSY; + goto out_err; } rcu_assign_pointer(vlansdata->u.vlan.sta, sta); @@ -1472,12 +1499,12 @@ static int ieee80211_change_station(struct wiphy *wiphy, } err = sta_apply_parameters(local, sta, params); - if (err) { - mutex_unlock(&local->sta_mtx); - return err; - } + if (err) + goto out_err; - if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates) + /* When peer becomes authorized, init rate control as well */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && + test_sta_flag(sta, WLAN_STA_AUTHORIZED)) rate_control_rate_init(sta); mutex_unlock(&local->sta_mtx); @@ -1487,7 +1514,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, ieee80211_recalc_ps(local, -1); ieee80211_recalc_ps_vif(sdata); } + return 0; +out_err: + mutex_unlock(&local->sta_mtx); + return err; } #ifdef CONFIG_MAC80211_MESH diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9e7c10420da..83151a50e5a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2967,6 +2967,7 @@ static int parse_station_flags(struct genl_info *info, sta_flags = nla_data(nla); params->sta_flags_mask = sta_flags->mask; params->sta_flags_set = sta_flags->set; + params->sta_flags_set &= params->sta_flags_mask; if ((params->sta_flags_mask | params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) return -EINVAL; @@ -3320,6 +3321,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } +int cfg80211_check_station_change(struct wiphy *wiphy, + struct station_parameters *params, + enum cfg80211_station_type statype) +{ + if (params->listen_interval != -1) + return -EINVAL; + if (params->aid) + return -EINVAL; + + /* When you run into this, adjust the code below for the new flag */ + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + + switch (statype) { + case CFG80211_STA_MESH_PEER_NONSEC: + case CFG80211_STA_MESH_PEER_SECURE: + /* + * No ignoring the TDLS flag here -- the userspace mesh + * code doesn't have the bug of including TDLS in the + * mask everywhere. + */ + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHORIZED))) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_SETUP: + case CFG80211_STA_TDLS_PEER_ACTIVE: + if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) + return -EINVAL; + /* ignore since it can't change */ + params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + break; + default: + /* disallow mesh-specific things */ + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + return -EINVAL; + if (params->local_pm) + return -EINVAL; + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; + } + + if (statype != CFG80211_STA_TDLS_PEER_SETUP && + statype != CFG80211_STA_TDLS_PEER_ACTIVE) { + /* TDLS can't be set, ... */ + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + return -EINVAL; + /* + * ... but don't bother the driver with it. This works around + * a hostapd/wpa_supplicant issue -- it always includes the + * TLDS_PEER flag in the mask even for AP mode. + */ + params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + } + + if (statype != CFG80211_STA_TDLS_PEER_SETUP) { + /* reject other things that can't change */ + if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) + return -EINVAL; + if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY) + return -EINVAL; + if (params->supported_rates) + return -EINVAL; + if (params->ext_capab || params->ht_capa || params->vht_capa) + return -EINVAL; + } + + if (statype != CFG80211_STA_AP_CLIENT) { + if (params->vlan) + return -EINVAL; + } + + switch (statype) { + case CFG80211_STA_AP_MLME_CLIENT: + /* Use this only for authorizing/unauthorizing a station */ + if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) + return -EOPNOTSUPP; + break; + case CFG80211_STA_AP_CLIENT: + /* accept only the listed bits */ + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + + /* but authenticated/associated only if driver handles it */ + if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params->sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + break; + case CFG80211_STA_IBSS: + case CFG80211_STA_AP_STA: + /* reject any changes other than AUTHORIZED */ + if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_SETUP: + /* reject any changes other than AUTHORIZED or WME */ + if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_WME))) + return -EINVAL; + /* force (at least) rates when authorizing */ + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) && + !params->supported_rates) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_ACTIVE: + /* reject any changes */ + return -EINVAL; + case CFG80211_STA_MESH_PEER_NONSEC: + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; + break; + case CFG80211_STA_MESH_PEER_SECURE: + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + return -EINVAL; + break; + } + + return 0; +} +EXPORT_SYMBOL(cfg80211_check_station_change); + /* * Get vlan interface making sure it is running and on the right wiphy. */ @@ -3342,6 +3473,13 @@ static struct net_device *get_vlan(struct genl_info *info, goto error; } + if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { + ret = -EINVAL; + goto error; + } + if (!netif_running(v)) { ret = -ENETDOWN; goto error; @@ -3410,15 +3548,18 @@ static int nl80211_set_station_tdls(struct genl_info *info, static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int err; struct net_device *dev = info->user_ptr[1]; struct station_parameters params; - u8 *mac_addr = NULL; + u8 *mac_addr; + int err; memset(¶ms, 0, sizeof(params)); params.listen_interval = -1; + if (!rdev->ops->change_station) + return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_STA_AID]) return -EINVAL; @@ -3450,9 +3591,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; - if (!rdev->ops->change_station) - return -EOPNOTSUPP; - if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; @@ -3482,133 +3620,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.local_pm = pm; } + /* Include parameters for TDLS peer (will check later) */ + err = nl80211_set_station_tdls(info, ¶ms); + if (err) + return err; + + params.vlan = get_vlan(info, rdev); + if (IS_ERR(params.vlan)) + return PTR_ERR(params.vlan); + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: - /* disallow mesh-specific things */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) - return -EINVAL; - - /* TDLS can't be set, ... */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) - return -EINVAL; - /* - * ... but don't bother the driver with it. This works around - * a hostapd/wpa_supplicant issue -- it always includes the - * TLDS_PEER flag in the mask even for AP mode. - */ - params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - - /* accept only the listed bits */ - if (params.sta_flags_mask & - ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | - BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED) | - BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | - BIT(NL80211_STA_FLAG_WME) | - BIT(NL80211_STA_FLAG_MFP))) - return -EINVAL; - - /* but authenticated/associated only if driver handles it */ - if (!(rdev->wiphy.features & - NL80211_FEATURE_FULL_AP_CLIENT_STATE) && - params.sta_flags_mask & - (BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED))) - return -EINVAL; - - /* reject other things that can't change */ - if (params.supported_rates) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - - /* must be last in here for error handling */ - params.vlan = get_vlan(info, rdev); - if (IS_ERR(params.vlan)) - return PTR_ERR(params.vlan); - break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - /* - * Don't allow userspace to change the TDLS_PEER flag, - * but silently ignore attempts to change it since we - * don't have state here to verify that it doesn't try - * to change the flag. - */ - params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - /* Include parameters for TDLS peer (driver will check) */ - err = nl80211_set_station_tdls(info, ¶ms); - if (err) - return err; - /* disallow things sta doesn't support */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) - return -EINVAL; - /* reject any changes other than AUTHORIZED or WME (for TDLS) */ - if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | - BIT(NL80211_STA_FLAG_WME))) - return -EINVAL; - break; case NL80211_IFTYPE_ADHOC: - /* disallow things sta doesn't support */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - /* reject any changes other than AUTHORIZED */ - if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) - return -EINVAL; - break; case NL80211_IFTYPE_MESH_POINT: - /* disallow things mesh doesn't support */ - if (params.vlan) - return -EINVAL; - if (params.supported_rates) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - /* - * No special handling for TDLS here -- the userspace - * mesh code doesn't have this bug. - */ - if (params.sta_flags_mask & - ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_MFP) | - BIT(NL80211_STA_FLAG_AUTHORIZED))) - return -EINVAL; break; default: - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out_put_vlan; } - /* be aware of params.vlan when changing code here */ - + /* driver will call cfg80211_check_station_change() */ err = rdev_change_station(rdev, dev, mac_addr, ¶ms); + out_put_vlan: if (params.vlan) dev_put(params.vlan); @@ -3687,6 +3725,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; + /* When you run into this, adjust the code below for the new flag */ + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3730,8 +3771,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* ignore uAPSD data */ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; - /* associated is disallowed */ - if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + /* these are disallowed */ + if (params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED))) return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) @@ -3742,6 +3785,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* ... with external setup is supported */ if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) return -EOPNOTSUPP; + /* + * Older wpa_supplicant versions always mark the TDLS peer + * as authorized, but it shouldn't yet be. + */ + params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED); break; default: return -EOPNOTSUPP; -- cgit v1.2.3-70-g09d2 From 3713b4e364effef4b170c97d54528b1cdb16aa6b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 16:19:38 +0100 Subject: nl80211: allow splitting wiphy information in dumps The per-wiphy information is getting large, to the point where with more than the typical number of channels it's too large and overflows, and userspace can't get any of the information at all. To address this (in a way that doesn't require making all messages bigger) allow userspace to specify that it can deal with wiphy information split across multiple parts of the dump, and if it can split up the data. This also splits up each channel separately so an arbitrary number of channels can be supported. Additionally, since GET_WIPHY has the same problem, add support for filtering the wiphy dump and get information for a single wiphy only, this allows userspace apps to use dump in this case to retrieve all data from a single device. As userspace needs to know if all this this is supported, add a global nl80211 feature set and include a bit for this behaviour in it. Cc: Dennis H Jensen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 28 ++ net/wireless/nl80211.c | 933 ++++++++++++++++++++++++++----------------- 2 files changed, 599 insertions(+), 362 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 523ed3d65b4..9844c10a299 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -625,6 +625,10 @@ * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the * event. * + * @NL80211_CMD_GET_PROTOCOL_FEATURES: Get global nl80211 protocol features, + * i.e. features for the nl80211 protocol rather than device features. + * Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -779,6 +783,8 @@ enum nl80211_commands { NL80211_CMD_RADAR_DETECT, + NL80211_CMD_GET_PROTOCOL_FEATURES, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1383,6 +1389,13 @@ enum nl80211_commands { * advertised to the driver, e.g., to enable TDLS off channel operations * and PU-APSD. * + * @NL80211_ATTR_PROTOCOL_FEATURES: global nl80211 feature flags, see + * &enum nl80211_protocol_features, the attribute is a u32. + * + * @NL80211_ATTR_SPLIT_WIPHY_DUMP: flag attribute, userspace supports + * receiving the data for a single wiphy split across multiple + * messages, given with wiphy dump message + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1669,6 +1682,9 @@ enum nl80211_attrs { NL80211_ATTR_STA_CAPABILITY, NL80211_ATTR_STA_EXT_CAPABILITY, + NL80211_ATTR_PROTOCOL_FEATURES, + NL80211_ATTR_SPLIT_WIPHY_DUMP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3619,4 +3635,16 @@ enum nl80211_dfs_state { NL80211_DFS_AVAILABLE, }; +/** + * enum enum nl80211_protocol_features - nl80211 protocol features + * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting + * wiphy dumps (if requested by the application with the attribute + * %NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the + * wiphy dump by %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFINDEX or + * %NL80211_ATTR_WDEV. + */ +enum nl80211_protocol_features { + NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP = 1 << 0, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 83151a50e5a..f187a920ec7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -370,6 +370,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, + [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, }, }; /* policy for the key attributes */ @@ -892,412 +893,545 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, - struct cfg80211_registered_device *dev) +#ifdef CONFIG_PM +static int nl80211_send_wowlan(struct sk_buff *msg, + struct cfg80211_registered_device *dev) { - void *hdr; - struct nlattr *nl_bands, *nl_band; - struct nlattr *nl_freqs, *nl_freq; - struct nlattr *nl_rates, *nl_rate; - struct nlattr *nl_cmds; - enum ieee80211_band band; - struct ieee80211_channel *chan; - struct ieee80211_rate *rate; - int i; - const struct ieee80211_txrx_stypes *mgmt_stypes = - dev->wiphy.mgmt_stypes; + struct nlattr *nl_wowlan; - hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); - if (!hdr) - return -1; + if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns) + return 0; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || - nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) || - nla_put_u32(msg, NL80211_ATTR_GENERATION, - cfg80211_rdev_list_generation) || - nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, - dev->wiphy.retry_short) || - nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, - dev->wiphy.retry_long) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, - dev->wiphy.frag_threshold) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, - dev->wiphy.rts_threshold) || - nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, - dev->wiphy.coverage_class) || - nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, - dev->wiphy.max_scan_ssids) || - nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, - dev->wiphy.max_sched_scan_ssids) || - nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, - dev->wiphy.max_scan_ie_len) || - nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, - dev->wiphy.max_sched_scan_ie_len) || - nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, - dev->wiphy.max_match_sets)) - goto nla_put_failure; + nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); + if (!nl_wowlan) + return -ENOBUFS; - if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && - nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && - nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && - nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && - nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && - nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && - nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) - goto nla_put_failure; + if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) + return -ENOBUFS; - if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, - sizeof(u32) * dev->wiphy.n_cipher_suites, - dev->wiphy.cipher_suites)) - goto nla_put_failure; + if (dev->wiphy.wowlan.n_patterns) { + struct nl80211_wowlan_pattern_support pat = { + .max_patterns = dev->wiphy.wowlan.n_patterns, + .min_pattern_len = dev->wiphy.wowlan.pattern_min_len, + .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, + .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset, + }; - if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, - dev->wiphy.max_num_pmkids)) - goto nla_put_failure; + if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, + sizeof(pat), &pat)) + return -ENOBUFS; + } - if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && - nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) - goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, - dev->wiphy.available_antennas_tx) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, - dev->wiphy.available_antennas_rx)) - goto nla_put_failure; + return 0; +} +#endif - if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && - nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, - dev->wiphy.probe_resp_offload)) - goto nla_put_failure; +static int nl80211_send_band_rateinfo(struct sk_buff *msg, + struct ieee80211_supported_band *sband) +{ + struct nlattr *nl_rates, *nl_rate; + struct ieee80211_rate *rate; + int i; - if ((dev->wiphy.available_antennas_tx || - dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { - u32 tx_ant = 0, rx_ant = 0; - int res; - res = rdev_get_antenna(dev, &tx_ant, &rx_ant); - if (!res) { - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, - tx_ant) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, - rx_ant)) - goto nla_put_failure; - } - } + /* add HT info */ + if (sband->ht_cap.ht_supported && + (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, + sizeof(sband->ht_cap.mcs), + &sband->ht_cap.mcs) || + nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, + sband->ht_cap.cap) || + nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + sband->ht_cap.ampdu_factor) || + nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + sband->ht_cap.ampdu_density))) + return -ENOBUFS; - if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, - dev->wiphy.interface_modes)) - goto nla_put_failure; + /* add VHT info */ + if (sband->vht_cap.vht_supported && + (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, + sizeof(sband->vht_cap.vht_mcs), + &sband->vht_cap.vht_mcs) || + nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, + sband->vht_cap.cap))) + return -ENOBUFS; - nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); - if (!nl_bands) - goto nla_put_failure; + /* add bitrates */ + nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); + if (!nl_rates) + return -ENOBUFS; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (!dev->wiphy.bands[band]) - continue; + for (i = 0; i < sband->n_bitrates; i++) { + nl_rate = nla_nest_start(msg, i); + if (!nl_rate) + return -ENOBUFS; - nl_band = nla_nest_start(msg, band); - if (!nl_band) - goto nla_put_failure; + rate = &sband->bitrates[i]; + if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, + rate->bitrate)) + return -ENOBUFS; + if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && + nla_put_flag(msg, + NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) + return -ENOBUFS; - /* add HT info */ - if (dev->wiphy.bands[band]->ht_cap.ht_supported && - (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, - sizeof(dev->wiphy.bands[band]->ht_cap.mcs), - &dev->wiphy.bands[band]->ht_cap.mcs) || - nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, - dev->wiphy.bands[band]->ht_cap.cap) || - nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, - dev->wiphy.bands[band]->ht_cap.ampdu_factor) || - nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, - dev->wiphy.bands[band]->ht_cap.ampdu_density))) - goto nla_put_failure; + nla_nest_end(msg, nl_rate); + } - /* add VHT info */ - if (dev->wiphy.bands[band]->vht_cap.vht_supported && - (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, - sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs), - &dev->wiphy.bands[band]->vht_cap.vht_mcs) || - nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, - dev->wiphy.bands[band]->vht_cap.cap))) - goto nla_put_failure; + nla_nest_end(msg, nl_rates); - /* add frequencies */ - nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); - if (!nl_freqs) - goto nla_put_failure; + return 0; +} - for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) { - nl_freq = nla_nest_start(msg, i); - if (!nl_freq) - goto nla_put_failure; +static int +nl80211_send_mgmt_stypes(struct sk_buff *msg, + const struct ieee80211_txrx_stypes *mgmt_stypes) +{ + u16 stypes; + struct nlattr *nl_ftypes, *nl_ifs; + enum nl80211_iftype ift; + int i; - chan = &dev->wiphy.bands[band]->channels[i]; + if (!mgmt_stypes) + return 0; - if (nl80211_msg_put_channel(msg, chan)) - goto nla_put_failure; + nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); + if (!nl_ifs) + return -ENOBUFS; - nla_nest_end(msg, nl_freq); + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + return -ENOBUFS; + i = 0; + stypes = mgmt_stypes[ift].tx; + while (stypes) { + if ((stypes & 1) && + nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT)) + return -ENOBUFS; + stypes >>= 1; + i++; } + nla_nest_end(msg, nl_ftypes); + } - nla_nest_end(msg, nl_freqs); + nla_nest_end(msg, nl_ifs); - /* add bitrates */ - nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); - if (!nl_rates) - goto nla_put_failure; + nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); + if (!nl_ifs) + return -ENOBUFS; - for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) { - nl_rate = nla_nest_start(msg, i); - if (!nl_rate) - goto nla_put_failure; + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + return -ENOBUFS; + i = 0; + stypes = mgmt_stypes[ift].rx; + while (stypes) { + if ((stypes & 1) && + nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT)) + return -ENOBUFS; + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); + } + nla_nest_end(msg, nl_ifs); - rate = &dev->wiphy.bands[band]->bitrates[i]; - if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, - rate->bitrate)) - goto nla_put_failure; - if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && - nla_put_flag(msg, - NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) - goto nla_put_failure; + return 0; +} - nla_nest_end(msg, nl_rate); - } +static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, + struct sk_buff *msg, u32 portid, u32 seq, + int flags, bool split, long *split_start, + long *band_start, long *chan_start) +{ + void *hdr; + struct nlattr *nl_bands, *nl_band; + struct nlattr *nl_freqs, *nl_freq; + struct nlattr *nl_cmds; + enum ieee80211_band band; + struct ieee80211_channel *chan; + int i; + const struct ieee80211_txrx_stypes *mgmt_stypes = + dev->wiphy.mgmt_stypes; + long start = 0, start_chan = 0, start_band = 0; - nla_nest_end(msg, nl_rates); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); + if (!hdr) + return -ENOBUFS; - nla_nest_end(msg, nl_band); + /* allow always using the variables */ + if (!split) { + split_start = &start; + band_start = &start_band; + chan_start = &start_chan; } - nla_nest_end(msg, nl_bands); - nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); - if (!nl_cmds) - goto nla_put_failure; + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || + nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, + wiphy_name(&dev->wiphy)) || + nla_put_u32(msg, NL80211_ATTR_GENERATION, + cfg80211_rdev_list_generation)) + goto nla_put_failure; + + switch (*split_start) { + case 0: + if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, + dev->wiphy.retry_short) || + nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, + dev->wiphy.retry_long) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + dev->wiphy.frag_threshold) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, + dev->wiphy.rts_threshold) || + nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, + dev->wiphy.coverage_class) || + nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, + dev->wiphy.max_scan_ssids) || + nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, + dev->wiphy.max_sched_scan_ssids) || + nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, + dev->wiphy.max_scan_ie_len) || + nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, + dev->wiphy.max_sched_scan_ie_len) || + nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, + dev->wiphy.max_match_sets)) + goto nla_put_failure; - i = 0; -#define CMD(op, n) \ - do { \ - if (dev->ops->op) { \ - i++; \ - if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ - goto nla_put_failure; \ - } \ - } while (0) - - CMD(add_virtual_intf, NEW_INTERFACE); - CMD(change_virtual_intf, SET_INTERFACE); - CMD(add_key, NEW_KEY); - CMD(start_ap, START_AP); - CMD(add_station, NEW_STATION); - CMD(add_mpath, NEW_MPATH); - CMD(update_mesh_config, SET_MESH_CONFIG); - CMD(change_bss, SET_BSS); - CMD(auth, AUTHENTICATE); - CMD(assoc, ASSOCIATE); - CMD(deauth, DEAUTHENTICATE); - CMD(disassoc, DISASSOCIATE); - CMD(join_ibss, JOIN_IBSS); - CMD(join_mesh, JOIN_MESH); - CMD(set_pmksa, SET_PMKSA); - CMD(del_pmksa, DEL_PMKSA); - CMD(flush_pmksa, FLUSH_PMKSA); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) - CMD(remain_on_channel, REMAIN_ON_CHANNEL); - CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); - CMD(mgmt_tx, FRAME); - CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); - if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) + if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && + nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) goto nla_put_failure; - } - if (dev->ops->set_monitor_channel || dev->ops->start_ap || - dev->ops->join_mesh) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && + nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) goto nla_put_failure; - } - CMD(set_wds_peer, SET_WDS_PEER); - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { - CMD(tdls_mgmt, TDLS_MGMT); - CMD(tdls_oper, TDLS_OPER); - } - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) - CMD(sched_scan_start, START_SCHED_SCAN); - CMD(probe_client, PROBE_CLIENT); - CMD(set_noack_map, SET_NOACK_MAP); - if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) + if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && + nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && + nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && + nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && + nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; - } - CMD(start_p2p_device, START_P2P_DEVICE); - CMD(set_mcast_rate, SET_MCAST_RATE); -#ifdef CONFIG_NL80211_TESTMODE - CMD(testmode_cmd, TESTMODE); -#endif + (*split_start)++; + if (split) + break; + case 1: + if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, + sizeof(u32) * dev->wiphy.n_cipher_suites, + dev->wiphy.cipher_suites)) + goto nla_put_failure; -#undef CMD + if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, + dev->wiphy.max_num_pmkids)) + goto nla_put_failure; - if (dev->ops->connect || dev->ops->auth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) + if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) goto nla_put_failure; - } - if (dev->ops->disconnect || dev->ops->deauth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, + dev->wiphy.available_antennas_tx) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, + dev->wiphy.available_antennas_rx)) goto nla_put_failure; - } - nla_nest_end(msg, nl_cmds); + if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && + nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, + dev->wiphy.probe_resp_offload)) + goto nla_put_failure; - if (dev->ops->remain_on_channel && - (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && - nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, - dev->wiphy.max_remain_on_channel_duration)) - goto nla_put_failure; + if ((dev->wiphy.available_antennas_tx || + dev->wiphy.available_antennas_rx) && + dev->ops->get_antenna) { + u32 tx_ant = 0, rx_ant = 0; + int res; + res = rdev_get_antenna(dev, &tx_ant, &rx_ant); + if (!res) { + if (nla_put_u32(msg, + NL80211_ATTR_WIPHY_ANTENNA_TX, + tx_ant) || + nla_put_u32(msg, + NL80211_ATTR_WIPHY_ANTENNA_RX, + rx_ant)) + goto nla_put_failure; + } + } - if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && - nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) - goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 2: + if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, + dev->wiphy.interface_modes)) + goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 3: + nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); + if (!nl_bands) + goto nla_put_failure; - if (mgmt_stypes) { - u16 stypes; - struct nlattr *nl_ftypes, *nl_ifs; - enum nl80211_iftype ift; + for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; - nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); - if (!nl_ifs) - goto nla_put_failure; + sband = dev->wiphy.bands[band]; - for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); - if (!nl_ftypes) + if (!sband) + continue; + + nl_band = nla_nest_start(msg, band); + if (!nl_band) goto nla_put_failure; - i = 0; - stypes = mgmt_stypes[ift].tx; - while (stypes) { - if ((stypes & 1) && - nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, - (i << 4) | IEEE80211_FTYPE_MGMT)) + + switch (*chan_start) { + case 0: + if (nl80211_send_band_rateinfo(msg, sband)) goto nla_put_failure; - stypes >>= 1; - i++; + (*chan_start)++; + if (split) + break; + default: + /* add frequencies */ + nl_freqs = nla_nest_start( + msg, NL80211_BAND_ATTR_FREQS); + if (!nl_freqs) + goto nla_put_failure; + + for (i = *chan_start - 1; + i < sband->n_channels; + i++) { + nl_freq = nla_nest_start(msg, i); + if (!nl_freq) + goto nla_put_failure; + + chan = &sband->channels[i]; + + if (nl80211_msg_put_channel(msg, chan)) + goto nla_put_failure; + + nla_nest_end(msg, nl_freq); + if (split) + break; + } + if (i < sband->n_channels) + *chan_start = i + 2; + else + *chan_start = 0; + nla_nest_end(msg, nl_freqs); + } + + nla_nest_end(msg, nl_band); + + if (split) { + /* start again here */ + if (*chan_start) + band--; + break; } - nla_nest_end(msg, nl_ftypes); } + nla_nest_end(msg, nl_bands); - nla_nest_end(msg, nl_ifs); + if (band < IEEE80211_NUM_BANDS) + *band_start = band + 1; + else + *band_start = 0; - nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); - if (!nl_ifs) + /* if bands & channels are done, continue outside */ + if (*band_start == 0 && *chan_start == 0) + (*split_start)++; + if (split) + break; + case 4: + nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); + if (!nl_cmds) goto nla_put_failure; - for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); - if (!nl_ftypes) + i = 0; +#define CMD(op, n) \ + do { \ + if (dev->ops->op) { \ + i++; \ + if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ + goto nla_put_failure; \ + } \ + } while (0) + + CMD(add_virtual_intf, NEW_INTERFACE); + CMD(change_virtual_intf, SET_INTERFACE); + CMD(add_key, NEW_KEY); + CMD(start_ap, START_AP); + CMD(add_station, NEW_STATION); + CMD(add_mpath, NEW_MPATH); + CMD(update_mesh_config, SET_MESH_CONFIG); + CMD(change_bss, SET_BSS); + CMD(auth, AUTHENTICATE); + CMD(assoc, ASSOCIATE); + CMD(deauth, DEAUTHENTICATE); + CMD(disassoc, DISASSOCIATE); + CMD(join_ibss, JOIN_IBSS); + CMD(join_mesh, JOIN_MESH); + CMD(set_pmksa, SET_PMKSA); + CMD(del_pmksa, DEL_PMKSA); + CMD(flush_pmksa, FLUSH_PMKSA); + if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + CMD(remain_on_channel, REMAIN_ON_CHANNEL); + CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); + CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); + if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; - i = 0; - stypes = mgmt_stypes[ift].rx; - while (stypes) { - if ((stypes & 1) && - nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, - (i << 4) | IEEE80211_FTYPE_MGMT)) - goto nla_put_failure; - stypes >>= 1; - i++; - } - nla_nest_end(msg, nl_ftypes); } - nla_nest_end(msg, nl_ifs); - } + if (dev->ops->set_monitor_channel || dev->ops->start_ap || + dev->ops->join_mesh) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + goto nla_put_failure; + } + CMD(set_wds_peer, SET_WDS_PEER); + if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + CMD(tdls_mgmt, TDLS_MGMT); + CMD(tdls_oper, TDLS_OPER); + } + if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + CMD(sched_scan_start, START_SCHED_SCAN); + CMD(probe_client, PROBE_CLIENT); + CMD(set_noack_map, SET_NOACK_MAP); + if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) + goto nla_put_failure; + } + CMD(start_p2p_device, START_P2P_DEVICE); + CMD(set_mcast_rate, SET_MCAST_RATE); -#ifdef CONFIG_PM - if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) { - struct nlattr *nl_wowlan; +#ifdef CONFIG_NL80211_TESTMODE + CMD(testmode_cmd, TESTMODE); +#endif - nl_wowlan = nla_nest_start(msg, - NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); - if (!nl_wowlan) - goto nla_put_failure; +#undef CMD - if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) - goto nla_put_failure; - if (dev->wiphy.wowlan.n_patterns) { - struct nl80211_wowlan_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan.n_patterns, - .min_pattern_len = - dev->wiphy.wowlan.pattern_min_len, - .max_pattern_len = - dev->wiphy.wowlan.pattern_max_len, - .max_pkt_offset = - dev->wiphy.wowlan.max_pkt_offset, - }; - if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, - sizeof(pat), &pat)) + if (dev->ops->connect || dev->ops->auth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) goto nla_put_failure; } - nla_nest_end(msg, nl_wowlan); - } + if (dev->ops->disconnect || dev->ops->deauth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) + goto nla_put_failure; + } + + nla_nest_end(msg, nl_cmds); + (*split_start)++; + if (split) + break; + case 5: + if (dev->ops->remain_on_channel && + (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && + nla_put_u32(msg, + NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + dev->wiphy.max_remain_on_channel_duration)) + goto nla_put_failure; + + if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && + nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) + goto nla_put_failure; + + if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) + goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 6: +#ifdef CONFIG_PM + if (nl80211_send_wowlan(msg, dev)) + goto nla_put_failure; + (*split_start)++; + if (split) + break; +#else + (*split_start)++; #endif + case 7: + if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, + dev->wiphy.software_iftypes)) + goto nla_put_failure; - if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, - dev->wiphy.software_iftypes)) - goto nla_put_failure; + if (nl80211_put_iface_combinations(&dev->wiphy, msg)) + goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg)) - goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 8: + if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && + nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, + dev->wiphy.ap_sme_capa)) + goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && - nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, - dev->wiphy.ap_sme_capa)) - goto nla_put_failure; + if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, + dev->wiphy.features)) + goto nla_put_failure; - if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, - dev->wiphy.features)) - goto nla_put_failure; + if (dev->wiphy.ht_capa_mod_mask && + nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, + sizeof(*dev->wiphy.ht_capa_mod_mask), + dev->wiphy.ht_capa_mod_mask)) + goto nla_put_failure; - if (dev->wiphy.ht_capa_mod_mask && - nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, - sizeof(*dev->wiphy.ht_capa_mod_mask), - dev->wiphy.ht_capa_mod_mask)) - goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; - if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && - dev->wiphy.max_acl_mac_addrs && - nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, - dev->wiphy.max_acl_mac_addrs)) - goto nla_put_failure; + /* + * Any information below this point is only available to + * applications that can deal with it being split. This + * helps ensure that newly added capabilities don't break + * older tools by overrunning their buffers. + * + * We still increment split_start so that in the split + * case we'll continue with more data in the next round, + * but break unconditionally so unsplit data stops here. + */ + (*split_start)++; + break; + case 9: + /* placeholder */ + /* done */ + *split_start = 0; + break; + } return genlmsg_end(msg, hdr); nla_put_failure: @@ -1310,39 +1444,80 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0, ret; int start = cb->args[0]; struct cfg80211_registered_device *dev; + s64 filter_wiphy = -1; + bool split = false; + struct nlattr **tb = nl80211_fam.attrbuf; + int res; mutex_lock(&cfg80211_mutex); + res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, nl80211_policy); + if (res == 0) { + split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; + if (tb[NL80211_ATTR_WIPHY]) + filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); + if (tb[NL80211_ATTR_WDEV]) + filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; + if (tb[NL80211_ATTR_IFINDEX]) { + struct net_device *netdev; + int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); + + netdev = dev_get_by_index(sock_net(skb->sk), ifidx); + if (!netdev) { + mutex_unlock(&cfg80211_mutex); + return -ENODEV; + } + if (netdev->ieee80211_ptr) { + dev = wiphy_to_dev( + netdev->ieee80211_ptr->wiphy); + filter_wiphy = dev->wiphy_idx; + } + dev_put(netdev); + } + } + list_for_each_entry(dev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) continue; if (++idx <= start) continue; - ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev); - if (ret < 0) { - /* - * If sending the wiphy data didn't fit (ENOBUFS or - * EMSGSIZE returned), this SKB is still empty (so - * it's not too big because another wiphy dataset is - * already in the skb) and we've not tried to adjust - * the dump allocation yet ... then adjust the alloc - * size to be bigger, and return 1 but with the empty - * skb. This results in an empty message being RX'ed - * in userspace, but that is ignored. - * - * We can then retry with the larger buffer. - */ - if ((ret == -ENOBUFS || ret == -EMSGSIZE) && - !skb->len && - cb->min_dump_alloc < 4096) { - cb->min_dump_alloc = 4096; - mutex_unlock(&cfg80211_mutex); - return 1; + if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy) + continue; + /* attempt to fit multiple wiphy data chunks into the skb */ + do { + ret = nl80211_send_wiphy(dev, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + split, &cb->args[1], + &cb->args[2], + &cb->args[3]); + if (ret < 0) { + /* + * If sending the wiphy data didn't fit (ENOBUFS + * or EMSGSIZE returned), this SKB is still + * empty (so it's not too big because another + * wiphy dataset is already in the skb) and + * we've not tried to adjust the dump allocation + * yet ... then adjust the alloc size to be + * bigger, and return 1 but with the empty skb. + * This results in an empty message being RX'ed + * in userspace, but that is ignored. + * + * We can then retry with the larger buffer. + */ + if ((ret == -ENOBUFS || ret == -EMSGSIZE) && + !skb->len && + cb->min_dump_alloc < 4096) { + cb->min_dump_alloc = 4096; + mutex_unlock(&cfg80211_mutex); + return 1; + } + idx--; + break; } - idx--; - break; - } + } while (cb->args[1] > 0); + break; } mutex_unlock(&cfg80211_mutex); @@ -1360,7 +1535,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { + if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, + false, NULL, NULL, NULL) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -7821,6 +7997,33 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nl80211_get_protocol_features(struct sk_buff *skb, + struct genl_info *info) +{ + void *hdr; + struct sk_buff *msg; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_GET_PROTOCOL_FEATURES); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES, + NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + + nla_put_failure: + kfree_skb(msg); + return -ENOBUFS; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -8497,6 +8700,11 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, + .doit = nl80211_get_protocol_features, + .policy = nl80211_policy, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -8524,7 +8732,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) if (!msg) return; - if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) { + if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, + false, NULL, NULL, NULL) < 0) { nlmsg_free(msg); return; } -- cgit v1.2.3-70-g09d2 From 9a886586c82aa02cb49f8c85e961595716884545 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Feb 2013 19:25:00 +0100 Subject: wireless: move sequence number arithmetic to ieee80211.h Move the sequence number arithmetic code from mac80211 to ieee80211.h so others can use it. Also rename the functions from _seq to _sn, they operate on the sequence number, not the sequence_control field. Also move macros to convert the sequence control to/from the sequence number value from various drivers. Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/3945.h | 4 --- drivers/net/wireless/iwlegacy/4965-mac.c | 13 ++++---- drivers/net/wireless/iwlegacy/common.h | 4 --- drivers/net/wireless/iwlwifi/dvm/tx.c | 11 ++++--- drivers/net/wireless/iwlwifi/iwl-trans.h | 3 -- drivers/net/wireless/iwlwifi/mvm/sta.c | 4 +-- drivers/net/wireless/iwlwifi/mvm/tx.c | 2 +- drivers/net/wireless/iwlwifi/pcie/tx.c | 2 +- drivers/net/wireless/rtlwifi/wifi.h | 3 -- include/linux/ieee80211.h | 28 +++++++++++++++++ net/mac80211/rx.c | 53 +++++++++++++------------------- 11 files changed, 66 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlegacy/3945.h b/drivers/net/wireless/iwlegacy/3945.h index 1d45075e0d5..9a8703def0b 100644 --- a/drivers/net/wireless/iwlegacy/3945.h +++ b/drivers/net/wireless/iwlegacy/3945.h @@ -150,10 +150,6 @@ struct il3945_frame { struct list_head list; }; -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) - #define SUP_RATE_11A_MAX_NUM_CHANNELS 8 #define SUP_RATE_11B_MAX_NUM_CHANNELS 4 #define SUP_RATE_11G_MAX_NUM_CHANNELS 12 diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 7941eb3a016..c092fcbbe96 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -2258,7 +2258,7 @@ il4965_tx_agg_start(struct il_priv *il, struct ieee80211_vif *vif, spin_lock_irqsave(&il->sta_lock, flags); tid_data = &il->stations[sta_id].tid[tid]; - *ssn = SEQ_TO_SN(tid_data->seq_number); + *ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); tid_data->agg.txq_id = txq_id; il_set_swq_id(&il->txq[txq_id], il4965_get_ac_from_tid(tid), txq_id); spin_unlock_irqrestore(&il->sta_lock, flags); @@ -2408,7 +2408,7 @@ il4965_txq_check_empty(struct il_priv *il, int sta_id, u8 tid, int txq_id) /* aggregated HW queue */ if (txq_id == tid_data->agg.txq_id && q->read_ptr == q->write_ptr) { - u16 ssn = SEQ_TO_SN(tid_data->seq_number); + u16 ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); int tx_fifo = il4965_get_fifo_from_tid(tid); D_HT("HW queue empty: continue DELBA flow\n"); il4965_txq_agg_disable(il, txq_id, ssn, tx_fifo); @@ -2627,7 +2627,8 @@ il4965_get_ra_sta_id(struct il_priv *il, struct ieee80211_hdr *hdr) static inline u32 il4965_get_scd_ssn(struct il4965_tx_resp *tx_resp) { - return le32_to_cpup(&tx_resp->u.status + tx_resp->frame_count) & MAX_SN; + return le32_to_cpup(&tx_resp->u.status + + tx_resp->frame_count) & IEEE80211_MAX_SN; } static inline u32 @@ -2717,15 +2718,15 @@ il4965_tx_status_reply_tx(struct il_priv *il, struct il_ht_agg *agg, hdr = (struct ieee80211_hdr *) skb->data; sc = le16_to_cpu(hdr->seq_ctrl); - if (idx != (SEQ_TO_SN(sc) & 0xff)) { + if (idx != (IEEE80211_SEQ_TO_SN(sc) & 0xff)) { IL_ERR("BUG_ON idx doesn't match seq control" " idx=%d, seq_idx=%d, seq=%d\n", idx, - SEQ_TO_SN(sc), hdr->seq_ctrl); + IEEE80211_SEQ_TO_SN(sc), hdr->seq_ctrl); return -1; } D_TX_REPLY("AGG Frame i=%d idx %d seq=%d\n", i, idx, - SEQ_TO_SN(sc)); + IEEE80211_SEQ_TO_SN(sc)); sh = idx - start; if (sh > 64) { diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h index 96f2025d936..73bd3ef316c 100644 --- a/drivers/net/wireless/iwlegacy/common.h +++ b/drivers/net/wireless/iwlegacy/common.h @@ -541,10 +541,6 @@ struct il_frame { struct list_head list; }; -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) - enum { CMD_SYNC = 0, CMD_SIZE_NORMAL = 0, diff --git a/drivers/net/wireless/iwlwifi/dvm/tx.c b/drivers/net/wireless/iwlwifi/dvm/tx.c index 6aec2df3bb2..d499a0366fa 100644 --- a/drivers/net/wireless/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/iwlwifi/dvm/tx.c @@ -418,7 +418,8 @@ int iwlagn_tx_skb(struct iwl_priv *priv, " Tx flags = 0x%08x, agg.state = %d", info->flags, tid_data->agg.state); IWL_ERR(priv, "sta_id = %d, tid = %d seq_num = %d", - sta_id, tid, SEQ_TO_SN(tid_data->seq_number)); + sta_id, tid, + IEEE80211_SEQ_TO_SN(tid_data->seq_number)); goto drop_unlock_sta; } @@ -569,7 +570,7 @@ int iwlagn_tx_agg_stop(struct iwl_priv *priv, struct ieee80211_vif *vif, return 0; } - tid_data->agg.ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->agg.ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); /* There are still packets for this RA / TID in the HW */ if (!test_bit(txq_id, priv->agg_q_alloc)) { @@ -651,7 +652,7 @@ int iwlagn_tx_agg_start(struct iwl_priv *priv, struct ieee80211_vif *vif, spin_lock_bh(&priv->sta_lock); tid_data = &priv->tid_data[sta_id][tid]; - tid_data->agg.ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->agg.ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); tid_data->agg.txq_id = txq_id; *ssn = tid_data->agg.ssn; @@ -911,7 +912,7 @@ static void iwlagn_count_agg_tx_err_status(struct iwl_priv *priv, u16 status) static inline u32 iwlagn_get_scd_ssn(struct iwlagn_tx_resp *tx_resp) { return le32_to_cpup((__le32 *)&tx_resp->status + - tx_resp->frame_count) & MAX_SN; + tx_resp->frame_count) & IEEE80211_MAX_SN; } static void iwl_rx_reply_tx_agg(struct iwl_priv *priv, @@ -1148,7 +1149,7 @@ int iwlagn_rx_reply_tx(struct iwl_priv *priv, struct iwl_rx_cmd_buffer *rxb, if (tx_resp->frame_count == 1) { u16 next_reclaimed = le16_to_cpu(tx_resp->seq_ctl); - next_reclaimed = SEQ_TO_SN(next_reclaimed + 0x10); + next_reclaimed = IEEE80211_SEQ_TO_SN(next_reclaimed + 0x10); if (is_agg) { /* If this is an aggregation queue, we can rely on the diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h index 8c7bec6b9a0..00bdc5b00af 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/iwlwifi/iwl-trans.h @@ -114,9 +114,6 @@ * completely agnostic to these differences. * The transport does provide helper functionnality (i.e. SYNC / ASYNC mode), */ -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) #define SEQ_TO_QUEUE(s) (((s) >> 8) & 0x1f) #define QUEUE_TO_SEQ(q) (((q) & 0x1f) << 8) #define SEQ_TO_INDEX(s) ((s) & 0xff) diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 861a7f9f8e7..52aecf20d0d 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -686,7 +686,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, spin_lock_bh(&mvmsta->lock); tid_data = &mvmsta->tid_data[tid]; - tid_data->ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); tid_data->txq_id = txq_id; *ssn = tid_data->ssn; @@ -779,7 +779,7 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, switch (tid_data->state) { case IWL_AGG_ON: - tid_data->ssn = SEQ_TO_SN(tid_data->seq_number); + tid_data->ssn = IEEE80211_SEQ_TO_SN(tid_data->seq_number); IWL_DEBUG_TX_QUEUES(mvm, "ssn = %d, next_recl = %d\n", diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 6b67ce3f679..56df249b215 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -641,7 +641,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, next_reclaimed = ssn; } else { /* The next packet to be reclaimed is the one after this one */ - next_reclaimed = SEQ_TO_SN(seq_ctl + 0x10); + next_reclaimed = IEEE80211_SEQ_TO_SN(seq_ctl + 0x10); } IWL_DEBUG_TX_REPLY(mvm, diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 8e9e3212fe7..ad7441dfa6f 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -1581,7 +1581,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, * Check here that the packets are in the right place on the ring. */ #ifdef CONFIG_IWLWIFI_DEBUG - wifi_seq = SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); WARN_ONCE((iwl_read_prph(trans, SCD_AGGR_SEL) & BIT(txq_id)) && ((wifi_seq & 0xff) != q->write_ptr), "Q: %d WiFi Seq %d tfdNum %d", diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h index f13258a8d99..c3eff32acf6 100644 --- a/drivers/net/wireless/rtlwifi/wifi.h +++ b/drivers/net/wireless/rtlwifi/wifi.h @@ -2127,9 +2127,6 @@ value to host byte ordering.*/ #define WLAN_FC_GET_TYPE(fc) (le16_to_cpu(fc) & IEEE80211_FCTL_FTYPE) #define WLAN_FC_GET_STYPE(fc) (le16_to_cpu(fc) & IEEE80211_FCTL_STYPE) #define WLAN_FC_MORE_DATA(fc) (le16_to_cpu(fc) & IEEE80211_FCTL_MOREDATA) -#define SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -#define SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) -#define MAX_SN ((IEEE80211_SCTL_SEQ) >> 4) #define RT_RF_OFF_LEVL_ASPM BIT(0) /*PCI ASPM */ #define RT_RF_OFF_LEVL_CLK_REQ BIT(1) /*PCI clock request */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7e24fe0cfbc..a0c550fb65a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -113,6 +113,34 @@ #define IEEE80211_CTL_EXT_SSW_FBACK 0x9000 #define IEEE80211_CTL_EXT_SSW_ACK 0xa000 + +#define IEEE80211_SN_MASK ((IEEE80211_SCTL_SEQ) >> 4) +#define IEEE80211_MAX_SN IEEE80211_SN_MASK +#define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) + +static inline int ieee80211_sn_less(u16 sn1, u16 sn2) +{ + return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); +} + +static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) +{ + return (sn1 + sn2) & IEEE80211_SN_MASK; +} + +static inline u16 ieee80211_sn_inc(u16 sn) +{ + return ieee80211_sn_add(sn, 1); +} + +static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) +{ + return (sn1 - sn2) & IEEE80211_SN_MASK; +} + +#define IEEE80211_SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) +#define IEEE80211_SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) + /* miscellaneous IEEE 802.11 constants */ #define IEEE80211_MAX_FRAG_THRESHOLD 2352 #define IEEE80211_MAX_RTS_THRESHOLD 2353 diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index acf006f2d61..1f940e2b6f2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -648,24 +648,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -#define SEQ_MODULO 0x1000 -#define SEQ_MASK 0xfff - -static inline int seq_less(u16 sq1, u16 sq2) -{ - return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1); -} - -static inline u16 seq_inc(u16 sq) -{ - return (sq + 1) & SEQ_MASK; -} - -static inline u16 seq_sub(u16 sq1, u16 sq2) -{ - return (sq1 - sq2) & SEQ_MASK; -} - static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, int index, @@ -687,7 +669,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, __skb_queue_tail(frames, skb); no_frame: - tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); + tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); } static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, @@ -699,8 +681,9 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata lockdep_assert_held(&tid_agg_rx->reorder_lock); - while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) { + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); @@ -727,8 +710,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&tid_agg_rx->reorder_lock); /* release the buffer until next missing frame */ - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; if (!tid_agg_rx->reorder_buf[index] && tid_agg_rx->stored_mpdu_num) { /* @@ -756,19 +739,22 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, * Increment the head seq# also for the skipped slots. */ tid_agg_rx->head_seq_num = - (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK; + (tid_agg_rx->head_seq_num + + skipped) & IEEE80211_SN_MASK; skipped = 0; } } else while (tid_agg_rx->reorder_buf[index]) { ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; } if (tid_agg_rx->stored_mpdu_num) { - j = index = seq_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; + j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % + tid_agg_rx->buf_size; for (; j != (index - 1) % tid_agg_rx->buf_size; j = (j + 1) % tid_agg_rx->buf_size) { @@ -809,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata head_seq_num = tid_agg_rx->head_seq_num; /* frame with out of date sequence number */ - if (seq_less(mpdu_seq_num, head_seq_num)) { + if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { dev_kfree_skb(skb); goto out; } @@ -818,8 +804,9 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata * If frame the sequence number exceeds our buffering window * size release some previous frames to make room for this one. */ - if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { - head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); + if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) { + head_seq_num = ieee80211_sn_inc( + ieee80211_sn_sub(mpdu_seq_num, buf_size)); /* release stored frames up to new head to stack */ ieee80211_release_reorder_frames(sdata, tid_agg_rx, head_seq_num, frames); @@ -827,7 +814,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata /* Now the new frame is always in the range of the reordering buffer */ - index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; + index = ieee80211_sn_sub(mpdu_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; /* check if we already stored this frame */ if (tid_agg_rx->reorder_buf[index]) { @@ -843,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata */ if (mpdu_seq_num == tid_agg_rx->head_seq_num && tid_agg_rx->stored_mpdu_num == 0) { - tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); + tid_agg_rx->head_seq_num = + ieee80211_sn_inc(tid_agg_rx->head_seq_num); ret = false; goto out; } -- cgit v1.2.3-70-g09d2 From b8a31c9a5afff257cc5dd637cda5fef03e12d67b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 22 Feb 2013 17:28:49 +0100 Subject: ieee80211: mark 802.11 related structs as being 2-byte aligned Regardless of what header features they use, or if they align the IP header or not, 802.11 packets from all drivers guarantee a 2-byte alignment (and there's a debug WARN_ON in case they don't). Annotate packet structs with __aligned(2) to allow the compiler to use 16-bit load/store operations on platforms with extremely inefficient unaligned access (e.g. MIPS). This reduces code size and improves performance on affected platforms and causes no binary code change on others. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a0c550fb65a..6e352c31fee 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -213,7 +213,7 @@ struct ieee80211_hdr { u8 addr3[6]; __le16 seq_ctrl; u8 addr4[6]; -} __packed; +} __packed __aligned(2); struct ieee80211_hdr_3addr { __le16 frame_control; @@ -222,7 +222,7 @@ struct ieee80211_hdr_3addr { u8 addr2[6]; u8 addr3[6]; __le16 seq_ctrl; -} __packed; +} __packed __aligned(2); struct ieee80211_qos_hdr { __le16 frame_control; @@ -232,7 +232,7 @@ struct ieee80211_qos_hdr { u8 addr3[6]; __le16 seq_ctrl; __le16 qos_ctrl; -} __packed; +} __packed __aligned(2); /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set @@ -609,7 +609,7 @@ struct ieee80211s_hdr { __le32 seqnum; u8 eaddr1[6]; u8 eaddr2[6]; -} __packed; +} __packed __aligned(2); /* Mesh flags */ #define MESH_FLAGS_AE_A4 0x1 @@ -903,7 +903,7 @@ struct ieee80211_mgmt { } u; } __packed action; } u; -} __packed; +} __packed __aligned(2); /* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 @@ -934,20 +934,20 @@ struct ieee80211_rts { __le16 duration; u8 ra[6]; u8 ta[6]; -} __packed; +} __packed __aligned(2); struct ieee80211_cts { __le16 frame_control; __le16 duration; u8 ra[6]; -} __packed; +} __packed __aligned(2); struct ieee80211_pspoll { __le16 frame_control; __le16 aid; u8 bssid[6]; u8 ta[6]; -} __packed; +} __packed __aligned(2); /* TDLS */ -- cgit v1.2.3-70-g09d2 From c8bb93f5f5d478a01db66127844d1d2dd30abec7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Feb 2013 17:26:44 +0100 Subject: wireless: remove unused VHT MCS defines There's an enum with the same values (but slightly different names except for NOT_SUPPORTED) that is actually used, so remove the defines. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 6e352c31fee..35c1f96d936 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1318,11 +1318,6 @@ struct ieee80211_vht_operation { } __packed; -#define IEEE80211_VHT_MCS_ZERO_TO_SEVEN_SUPPORT 0 -#define IEEE80211_VHT_MCS_ZERO_TO_EIGHT_SUPPORT 1 -#define IEEE80211_VHT_MCS_ZERO_TO_NINE_SUPPORT 2 -#define IEEE80211_VHT_MCS_NOT_SUPPORTED 3 - /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 0x00000001 -- cgit v1.2.3-70-g09d2 From ee2aca343c9aa64d277a75a5df043299dc84cfd9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Feb 2013 17:36:01 +0100 Subject: cfg80211: add ability to override VHT capabilities For testing it's sometimes useful to be able to override certain VHT capability advertisement, add the ability to do that in cfg80211. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 +++++++++++ include/uapi/linux/nl80211.h | 3 +++ net/wireless/core.h | 10 ++++++++-- net/wireless/mlme.c | 35 ++++++++++++++++++++++++++++++--- net/wireless/nl80211.c | 47 ++++++++++++++++++++++++++++++++++++++++++-- net/wireless/sme.c | 4 +++- 6 files changed, 103 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ed2b08da3b9..73a523901c7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1430,9 +1430,11 @@ struct cfg80211_auth_request { * enum cfg80211_assoc_req_flags - Over-ride default behaviour in association. * * @ASSOC_REQ_DISABLE_HT: Disable HT (802.11n) + * @ASSOC_REQ_DISABLE_VHT: Disable VHT */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), + ASSOC_REQ_DISABLE_VHT = BIT(1), }; /** @@ -1454,6 +1456,8 @@ enum cfg80211_assoc_req_flags { * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. + * @vht_capa: VHT capability override + * @vht_capa_mask: VHT capability mask indicating which fields to use */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -1464,6 +1468,7 @@ struct cfg80211_assoc_request { u32 flags; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; + struct ieee80211_vht_cap vht_capa, vht_capa_mask; }; /** @@ -1574,6 +1579,8 @@ struct cfg80211_ibss_params { * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. + * @vht_capa: VHT Capability overrides + * @vht_capa_mask: The bits of vht_capa which are to be used. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -1592,6 +1599,8 @@ struct cfg80211_connect_params { int bg_scan_period; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; + struct ieee80211_vht_cap vht_capa; + struct ieee80211_vht_cap vht_capa_mask; }; /** @@ -2516,6 +2525,8 @@ struct wiphy_wowlan_support { * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features. * @ht_capa_mod_mask: Specify what ht_cap values can be over-ridden. * If null, then none can be over-ridden. + * @vht_capa_mod_mask: Specify what VHT capabilities can be over-ridden. + * If null, then none can be over-ridden. * * @max_acl_mac_addrs: Maximum number of MAC addresses that the device * supports for ACL. @@ -2624,6 +2635,7 @@ struct wiphy { struct dentry *debugfsdir; const struct ieee80211_ht_cap *ht_capa_mod_mask; + const struct ieee80211_vht_cap *vht_capa_mod_mask; #ifdef CONFIG_NET_NS /* the network namespace this phy lives in currently */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9844c10a299..2c3e8836003 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1685,6 +1685,9 @@ enum nl80211_attrs { NL80211_ATTR_PROTOCOL_FEATURES, NL80211_ATTR_SPLIT_WIPHY_DUMP, + NL80211_ATTR_DISABLE_VHT, + NL80211_ATTR_VHT_CAPABILITY_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/core.h b/net/wireless/core.h index 3aec0e429d8..c2f94f22bb2 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -335,7 +335,9 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask); + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *prev_bssid, @@ -343,7 +345,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask); + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask); int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, @@ -375,6 +379,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, bool no_cck, bool dont_wait_for_ack, u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, + const struct ieee80211_vht_cap *vht_capa_mask); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 5a97ce6d283..c82adfee769 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -343,6 +343,23 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, p1[i] &= p2[i]; } +/* Do a logical ht_capa &= ht_capa_mask. */ +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, + const struct ieee80211_vht_cap *vht_capa_mask) +{ + int i; + u8 *p1, *p2; + if (!vht_capa_mask) { + memset(vht_capa, 0, sizeof(*vht_capa)); + return; + } + + p1 = (u8*)(vht_capa); + p2 = (u8*)(vht_capa_mask); + for (i = 0; i < sizeof(*vht_capa); i++) + p1[i] &= p2[i]; +} + int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, @@ -351,7 +368,9 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask) + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_assoc_request req; @@ -388,6 +407,13 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, sizeof(req.ht_capa_mask)); cfg80211_oper_and_ht_capa(&req.ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); + if (vht_capa) + memcpy(&req.vht_capa, vht_capa, sizeof(req.vht_capa)); + if (vht_capa_mask) + memcpy(&req.vht_capa_mask, vht_capa_mask, + sizeof(req.vht_capa_mask)); + cfg80211_oper_and_vht_capa(&req.vht_capa_mask, + rdev->wiphy.vht_capa_mod_mask); req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); @@ -422,7 +448,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt, u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask) + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_vht_cap *vht_capa, + struct ieee80211_vht_cap *vht_capa_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -431,7 +459,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, wdev_lock(wdev); err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, crypt, - assoc_flags, ht_capa, ht_capa_mask); + assoc_flags, ht_capa, ht_capa_mask, + vht_capa, vht_capa_mask); wdev_unlock(wdev); mutex_unlock(&rdev->devlist_mtx); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0e5176784b4..6a5893f5e48 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -371,6 +371,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, }, + [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG }, + [NL80211_ATTR_VHT_CAPABILITY_MASK] = { + .len = NL80211_VHT_CAPABILITY_LEN, + }, }; /* policy for the key attributes */ @@ -1522,6 +1526,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, dev->wiphy.extended_capabilities_mask))) goto nla_put_failure; + if (dev->wiphy.vht_capa_mod_mask && + nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, + sizeof(*dev->wiphy.vht_capa_mod_mask), + dev->wiphy.vht_capa_mod_mask)) + goto nla_put_failure; + /* done */ *split_start = 0; break; @@ -5982,6 +5992,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) u32 flags = 0; struct ieee80211_ht_cap *ht_capa = NULL; struct ieee80211_ht_cap *ht_capa_mask = NULL; + struct ieee80211_vht_cap *vht_capa = NULL; + struct ieee80211_vht_cap *vht_capa_mask = NULL; if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; @@ -6038,12 +6050,25 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); } + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) + flags |= ASSOC_REQ_DISABLE_VHT; + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) + vht_capa_mask = + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]); + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { + if (!vht_capa_mask) + return -EINVAL; + vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + } + err = nl80211_crypto_settings(rdev, info, &crypto, 1); if (!err) err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, - &crypto, flags, ht_capa, - ht_capa_mask); + &crypto, flags, ht_capa, ht_capa_mask, + vht_capa, vht_capa_mask); return err; } @@ -6623,6 +6648,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.ht_capa)); } + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) + connect.flags |= ASSOC_REQ_DISABLE_VHT; + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) + memcpy(&connect.vht_capa_mask, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), + sizeof(connect.vht_capa_mask)); + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { + if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) { + kfree(connkeys); + return -EINVAL; + } + memcpy(&connect.vht_capa, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), + sizeof(connect.vht_capa)); + } + err = cfg80211_connect(rdev, dev, &connect, connkeys); if (err) kfree(connkeys); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f432bd3755b..7da118c034f 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -195,7 +195,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) params->mfp != NL80211_MFP_NO, ¶ms->crypto, params->flags, ¶ms->ht_capa, - ¶ms->ht_capa_mask); + ¶ms->ht_capa_mask, + ¶ms->vht_capa, + ¶ms->vht_capa_mask); if (err) __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, -- cgit v1.2.3-70-g09d2 From d339d5ca8eee34f3c70386cf2545edc53e546a13 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 12 Feb 2013 09:34:13 +0200 Subject: mac80211: Allow drivers to differentiate between ROC types Some devices can handle remain on channel requests differently based on the request type/priority. Add support to differentiate between different ROC types, i.e., indicate that the ROC is required for sending managment frames. Signed-off-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 3 ++- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 7 ++++--- drivers/net/wireless/mac80211_hwsim.c | 3 ++- drivers/net/wireless/ti/wlcore/main.c | 3 ++- include/net/mac80211.h | 21 ++++++++++++++++++++- net/mac80211/cfg.c | 21 +++++++++++++++------ net/mac80211/driver-ops.h | 7 ++++--- net/mac80211/ieee80211_i.h | 1 + net/mac80211/offchannel.c | 2 +- net/mac80211/trace.h | 11 +++++++---- 10 files changed, 58 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 323e4a33fca..c7cd2dffa5c 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1137,7 +1137,8 @@ done: static int iwlagn_mac_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *channel, - int duration) + int duration, + enum ieee80211_roc_type type) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); struct iwl_rxon_context *ctx = &priv->contexts[IWL_RXON_CTX_PAN]; diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index e8264e11b12..23460f4a4c7 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1081,7 +1081,8 @@ static void iwl_mvm_mac_update_tkip_key(struct ieee80211_hw *hw, static int iwl_mvm_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *channel, - int duration) + int duration, + enum ieee80211_roc_type type) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); struct cfg80211_chan_def chandef; @@ -1092,8 +1093,8 @@ static int iwl_mvm_roc(struct ieee80211_hw *hw, return -EINVAL; } - IWL_DEBUG_MAC80211(mvm, "enter (%d, %d)\n", channel->hw_value, - duration); + IWL_DEBUG_MAC80211(mvm, "enter (%d, %d, %d)\n", channel->hw_value, + duration, type); mutex_lock(&mvm->mutex); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index cffdf4fbf16..7490c4fc717 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1535,7 +1535,8 @@ static void hw_roc_done(struct work_struct *work) static int mac80211_hwsim_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - int duration) + int duration, + enum ieee80211_roc_type type) { struct mac80211_hwsim_data *hwsim = hw->priv; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 2c2ff3e1f84..d7e306333f6 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4956,7 +4956,8 @@ static void wlcore_op_flush(struct ieee80211_hw *hw, bool drop) static int wlcore_op_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - int duration) + int duration, + enum ieee80211_roc_type type) { struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif); struct wl1271 *wl = hw->priv; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f7eba1300d8..9b0d342c067 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2134,6 +2134,24 @@ enum ieee80211_rate_control_changed { IEEE80211_RC_NSS_CHANGED = BIT(3), }; +/** + * enum ieee80211_roc_type - remain on channel type + * + * With the support for multi channel contexts and multi channel operations, + * remain on channel operations might be limited/deferred/aborted by other + * flows/operations which have higher priority (and vise versa). + * Specifying the ROC type can be used by devices to prioritize the ROC + * operations compared to other operations/flows. + * + * @IEEE80211_ROC_TYPE_NORMAL: There are no special requirements for this ROC. + * @IEEE80211_ROC_TYPE_MGMT_TX: The remain on channel request is required + * for sending managment frames offchannel. + */ +enum ieee80211_roc_type { + IEEE80211_ROC_TYPE_NORMAL = 0, + IEEE80211_ROC_TYPE_MGMT_TX, +}; + /** * struct ieee80211_ops - callbacks from mac80211 to the driver * @@ -2687,7 +2705,8 @@ struct ieee80211_ops { int (*remain_on_channel)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - int duration); + int duration, + enum ieee80211_roc_type type); int (*cancel_remain_on_channel)(struct ieee80211_hw *hw); int (*set_ringparam)(struct ieee80211_hw *hw, u32 tx, u32 rx); void (*get_ringparam)(struct ieee80211_hw *hw, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c115f82c037..f9cbdc29946 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2410,7 +2410,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, unsigned int duration, u64 *cookie, - struct sk_buff *txskb) + struct sk_buff *txskb, + enum ieee80211_roc_type type) { struct ieee80211_roc_work *roc, *tmp; bool queued = false; @@ -2429,6 +2430,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->duration = duration; roc->req_duration = duration; roc->frame = txskb; + roc->type = type; roc->mgmt_tx_cookie = (unsigned long)txskb; roc->sdata = sdata; INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); @@ -2459,7 +2461,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, if (!duration) duration = 10; - ret = drv_remain_on_channel(local, sdata, channel, duration); + ret = drv_remain_on_channel(local, sdata, channel, duration, type); if (ret) { kfree(roc); return ret; @@ -2478,10 +2480,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, * * If it hasn't started yet, just increase the duration * and add the new one to the list of dependents. + * If the type of the new ROC has higher priority, modify the + * type of the previous one to match that of the new one. */ if (!tmp->started) { list_add_tail(&roc->list, &tmp->dependents); tmp->duration = max(tmp->duration, roc->duration); + tmp->type = max(tmp->type, roc->type); queued = true; break; } @@ -2493,16 +2498,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, /* * In the offloaded ROC case, if it hasn't begun, add * this new one to the dependent list to be handled - * when the the master one begins. If it has begun, + * when the master one begins. If it has begun, * check that there's still a minimum time left and * if so, start this one, transmitting the frame, but - * add it to the list directly after this one with a + * add it to the list directly after this one with * a reduced time so we'll ask the driver to execute * it right after finishing the previous one, in the * hope that it'll also be executed right afterwards, * effectively extending the old one. * If there's no minimum time left, just add it to the * normal list. + * TODO: the ROC type is ignored here, assuming that it + * is better to immediately use the current ROC. */ if (!tmp->hw_begun) { list_add_tail(&roc->list, &tmp->dependents); @@ -2596,7 +2603,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, mutex_lock(&local->mtx); ret = ieee80211_start_roc_work(local, sdata, chan, - duration, cookie, NULL); + duration, cookie, NULL, + IEEE80211_ROC_TYPE_NORMAL); mutex_unlock(&local->mtx); return ret; @@ -2829,7 +2837,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* This will handle all kinds of coalescing and immediate TX */ ret = ieee80211_start_roc_work(local, sdata, chan, - wait, cookie, skb); + wait, cookie, skb, + IEEE80211_ROC_TYPE_MGMT_TX); if (ret) kfree_skb(skb); out_unlock: diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ee56d0779d8..832acea4a5c 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -787,15 +787,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local, static inline int drv_remain_on_channel(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - unsigned int duration) + unsigned int duration, + enum ieee80211_roc_type type) { int ret; might_sleep(); - trace_drv_remain_on_channel(local, sdata, chan, duration); + trace_drv_remain_on_channel(local, sdata, chan, duration, type); ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, - chan, duration); + chan, duration, type); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8da53a06730..2518f0429b8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -315,6 +315,7 @@ struct ieee80211_roc_work { u32 duration, req_duration; struct sk_buff *frame; u64 cookie, mgmt_tx_cookie; + enum ieee80211_roc_type type; }; /* flags used in struct ieee80211_if_managed.flags */ diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cc79b4a2e82..db547fceaeb 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -277,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) duration = 10; ret = drv_remain_on_channel(local, roc->sdata, roc->chan, - duration); + duration, roc->type); roc->started = true; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3d7cd2a0582..e7db2b804e0 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1042,15 +1042,17 @@ TRACE_EVENT(drv_remain_on_channel, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - unsigned int duration), + unsigned int duration, + enum ieee80211_roc_type type), - TP_ARGS(local, sdata, chan, duration), + TP_ARGS(local, sdata, chan, duration, type), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, center_freq) __field(unsigned int, duration) + __field(u32, type) ), TP_fast_assign( @@ -1058,12 +1060,13 @@ TRACE_EVENT(drv_remain_on_channel, VIF_ASSIGN; __entry->center_freq = chan->center_freq; __entry->duration = duration; + __entry->type = type; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms", + LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d", LOCAL_PR_ARG, VIF_PR_ARG, - __entry->center_freq, __entry->duration + __entry->center_freq, __entry->duration, __entry->type ) ); -- cgit v1.2.3-70-g09d2 From 355199e02b831fd4f652c34d6c7673d973da1369 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 27 Feb 2013 17:14:27 +0200 Subject: cfg80211: Extend support for IEEE 802.11r Fast BSS Transition Add NL80211_CMD_UPDATE_FT_IES to support update of FT IEs to the WLAN driver and NL80211_CMD_FT_EVENT to send FT events from the WLAN driver. This will carry the target AP's MAC address along with the relevant Information Elements. This event is used to report received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). These changes allow FT to be supported with drivers that use an internal SME instead of user space option (like FT implementation in wpa_supplicant with mac80211-based drivers). Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 41 ++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 19 +++++++++++ net/wireless/nl80211.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 13 ++++++++ net/wireless/trace.h | 46 +++++++++++++++++++++++++++ 5 files changed, 195 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 73a523901c7..dfef0d5b5d3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1762,6 +1762,21 @@ struct cfg80211_gtk_rekey_data { u8 replay_ctr[NL80211_REPLAY_CTR_LEN]; }; +/** + * struct cfg80211_update_ft_ies_params - FT IE Information + * + * This structure provides information needed to update the fast transition IE + * + * @md: The Mobility Domain ID, 2 Octet value + * @ie: Fast Transition IEs + * @ie_len: Length of ft_ie in octets + */ +struct cfg80211_update_ft_ies_params { + u16 md; + const u8 *ie; + size_t ie_len; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2208,6 +2223,8 @@ struct cfg80211_ops { int (*start_radar_detection)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef); + int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_update_ft_ies_params *ftie); }; /* @@ -4044,6 +4061,30 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); */ void cfg80211_unregister_wdev(struct wireless_dev *wdev); +/** + * struct cfg80211_ft_event - FT Information Elements + * @ies: FT IEs + * @ies_len: length of the FT IE in bytes + * @target_ap: target AP's MAC address + * @ric_ies: RIC IE + * @ric_ies_len: length of the RIC IE in bytes + */ +struct cfg80211_ft_event_params { + const u8 *ies; + size_t ies_len; + const u8 *target_ap; + const u8 *ric_ies; + size_t ric_ies_len; +}; + +/** + * cfg80211_ft_event - notify userspace about FT IE and RIC IE + * @netdev: network device + * @ft_event: IE information + */ +void cfg80211_ft_event(struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event); + /** * cfg80211_get_p2p_attr - find and copy a P2P attribute from IE buffer * @ies: the input IE buffer diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2c3e8836003..2d0cff57ff8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -629,6 +629,14 @@ * i.e. features for the nl80211 protocol rather than device features. * Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap. * + * @NL80211_CMD_UPDATE_FT_IES: Pass down the most up-to-date Fast Transition + * Information Element to the WLAN driver + * + * @NL80211_CMD_FT_EVENT: Send a Fast transition event from the WLAN driver + * to the supplicant. This will carry the target AP's MAC address along + * with the relevant Information Elements. This event is used to report + * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -785,6 +793,9 @@ enum nl80211_commands { NL80211_CMD_GET_PROTOCOL_FEATURES, + NL80211_CMD_UPDATE_FT_IES, + NL80211_CMD_FT_EVENT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1396,6 +1407,11 @@ enum nl80211_commands { * receiving the data for a single wiphy split across multiple * messages, given with wiphy dump message * + * @NL80211_ATTR_MDID: Mobility Domain Identifier + * + * @NL80211_ATTR_IE_RIC: Resource Information Container Information + * Element + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1688,6 +1704,9 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_VHT, NL80211_ATTR_VHT_CAPABILITY_MASK, + NL80211_ATTR_MDID, + NL80211_ATTR_IE_RIC, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a8bd453d22b..08de0c6035f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -375,6 +375,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_VHT_CAPABILITY_MASK] = { .len = NL80211_VHT_CAPABILITY_LEN, }, + [NL80211_ATTR_MDID] = { .type = NLA_U16 }, + [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, }; /* policy for the key attributes */ @@ -8160,6 +8163,27 @@ static int nl80211_get_protocol_features(struct sk_buff *skb, return -ENOBUFS; } +static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_update_ft_ies_params ft_params; + struct net_device *dev = info->user_ptr[1]; + + if (!rdev->ops->update_ft_ies) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MDID] || + !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + + memset(&ft_params, 0, sizeof(ft_params)); + ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]); + ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + + return rdev_update_ft_ies(rdev, dev, &ft_params); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -8841,6 +8865,14 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_get_protocol_features, .policy = nl80211_policy, }, + { + .cmd = NL80211_CMD_UPDATE_FT_IES, + .doit = nl80211_update_ft_ies, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -10542,6 +10574,50 @@ static struct notifier_block nl80211_netlink_notifier = { .notifier_call = nl80211_netlink_notify, }; +void cfg80211_ft_event(struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event) +{ + struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct sk_buff *msg; + void *hdr; + int err; + + trace_cfg80211_ft_event(wiphy, netdev, ft_event); + + if (!ft_event->target_ap) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); + if (ft_event->ies) + nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); + if (ft_event->ric_ies) + nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, + ft_event->ric_ies); + + err = genlmsg_end(msg, hdr); + if (err < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, GFP_KERNEL); +} +EXPORT_SYMBOL(cfg80211_ft_event); + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 422d38291d6..8c8b26f574e 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -887,4 +887,17 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, trace_rdev_return_int(&rdev->wiphy, ret); return ret; } + +static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_update_ft_ies_params *ftie) +{ + int ret; + + trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie); + ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b7a531380e1..ccadef2106a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1785,6 +1785,26 @@ TRACE_EVENT(rdev_set_mac_acl, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) ); +TRACE_EVENT(rdev_update_ft_ies, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_update_ft_ies_params *ftie), + TP_ARGS(wiphy, netdev, ftie), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u16, md) + __dynamic_array(u8, ie, ftie->ie_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->md = ftie->md; + memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2413,6 +2433,32 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup, TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); +TRACE_EVENT(cfg80211_ft_event, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event), + TP_ARGS(wiphy, netdev, ft_event), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __dynamic_array(u8, ies, ft_event->ies_len) + MAC_ENTRY(target_ap) + __dynamic_array(u8, ric_ies, ft_event->ric_ies_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + if (ft_event->ies) + memcpy(__get_dynamic_array(ies), ft_event->ies, + ft_event->ies_len); + MAC_ASSIGN(target_ap, ft_event->target_ap); + if (ft_event->ric_ies) + memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies, + ft_event->ric_ies_len); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3-70-g09d2 From a87121051ce80831a302c67286119013104f7a5a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 23 Feb 2013 00:22:44 +0100 Subject: mac80211: remove IEEE80211_KEY_FLAG_WMM_STA There's no driver using this flag, so it seems that all drivers support HW crypto with WMM or don't support it at all. Remove the flag and code setting it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 --- net/mac80211/key.c | 26 -------------------------- 2 files changed, 29 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9b0d342c067..421c3ac8c52 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1101,8 +1101,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * These flags are used for communication about keys between the driver * and mac80211, with the @flags parameter of &struct ieee80211_key_conf. * - * @IEEE80211_KEY_FLAG_WMM_STA: Set by mac80211, this flag indicates - * that the STA this key will be used with could be using QoS. * @IEEE80211_KEY_FLAG_GENERATE_IV: This flag should be set by the * driver to indicate that it requires IV generation for this * particular key. @@ -1127,7 +1125,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * %IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW. */ enum ieee80211_key_flags { - IEEE80211_KEY_FLAG_WMM_STA = 1<<0, IEEE80211_KEY_FLAG_GENERATE_IV = 1<<1, IEEE80211_KEY_FLAG_GENERATE_MMIC= 1<<2, IEEE80211_KEY_FLAG_PAIRWISE = 1<<3, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index df81b178c59..6eb4888a70e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -440,32 +440,6 @@ int ieee80211_key_link(struct ieee80211_key *key, key->sdata = sdata; key->sta = sta; - if (sta) { - /* - * some hardware cannot handle TKIP with QoS, so - * we indicate whether QoS could be in use. - */ - if (test_sta_flag(sta, WLAN_STA_WME)) - key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; - } else { - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - struct sta_info *ap; - - /* - * We're getting a sta pointer in, so must be under - * appropriate locking for sta_info_get(). - */ - - /* same here, the AP could be using QoS */ - ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid); - if (ap) { - if (test_sta_flag(ap, WLAN_STA_WME)) - key->conf.flags |= - IEEE80211_KEY_FLAG_WMM_STA; - } - } - } - mutex_lock(&sdata->local->key_mtx); if (sta && pairwise) -- cgit v1.2.3-70-g09d2 From 55d942f4246c79a8f3f17f92c224e641c5c26125 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Mar 2013 13:07:48 +0100 Subject: mac80211: restrict peer's VHT capabilities to own Implement restricting peer VHT capabilities to the device's own capabilities. This is useful when a single driver supports more than one device and the devices have different capabilities (often they will differ in the number of spatial streams), but in particular is also necessary for VHT capability overrides to work correctly -- otherwise it'd be possible to e.g. advertise, due to overrides, that TX-STBC is not supported, but then still use it to TX to the AP because it supports RX-STBC. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +- include/net/mac80211.h | 5 +- net/mac80211/vht.c | 114 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 117 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 35c1f96d936..4cf0c9e4dd9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1333,10 +1333,11 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_RXSTBC_2 0x00000200 #define IEEE80211_VHT_CAP_RXSTBC_3 0x00000300 #define IEEE80211_VHT_CAP_RXSTBC_4 0x00000400 +#define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 #define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX 0x00006000 -#define IEEE80211_VHT_CAP_SOUNDING_DIMENTION_MAX 0x00030000 +#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX 0x00030000 #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE 0x00080000 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE 0x00100000 #define IEEE80211_VHT_CAP_VHT_TXOP_PS 0x00200000 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 421c3ac8c52..cdd7cea1fd4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1228,9 +1228,8 @@ enum ieee80211_sta_rx_bandwidth { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA; restricted to our own TX capabilities - * @vht_cap: VHT capabilities of this STA; Not restricting any capabilities - * of remote STA. Taking as is. + * @ht_cap: HT capabilities of this STA; restricted to our own capabilities + * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities * @wme: indicates whether the STA supports WME. Only valid during AP-mode. * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index cacc1c74556..171344d4eb7 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -118,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap own_cap; + u32 cap_info, i; memset(vht_cap, 0, sizeof(*vht_cap)); @@ -133,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_supported = true; - vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); + own_cap = sband->vht_cap; + /* + * If user has specified capability overrides, take care + * of that if the station we're setting up is the AP that + * we advertised a restricted capability set to. Override + * our own capabilities and then use those below. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + ieee80211_apply_vhtcap_overrides(sdata, &own_cap); + + /* take some capabilities as-is */ + cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); + vht_cap->cap = cap_info; + vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_VHT_TXOP_PS | + IEEE80211_VHT_CAP_HTC_VHT | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB | + IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | + IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + + /* and some based on our own capabilities */ + switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; + break; + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + break; + default: + /* nothing */ + break; + } + + /* symmetric capabilities */ + vht_cap->cap |= cap_info & own_cap.cap & + (IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160); + + /* remaining ones */ + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { + vht_cap->cap |= cap_info & + (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | + IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX | + IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); + } + + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK; + + if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC; /* Copy peer MCS info, the driver might need them. */ memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + /* but also restrict MCSes */ + for (i = 0; i < 8; i++) { + u16 own_rx, own_tx, peer_rx, peer_tx; + + own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map); + own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map); + own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); + peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); + peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_rx < peer_tx) + peer_tx = own_rx; + } + + if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_tx < peer_rx) + peer_rx = own_tx; + } + + vht_cap->vht_mcs.rx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2); + + vht_cap->vht_mcs.tx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); + } + + /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: -- cgit v1.2.3-70-g09d2 From bb2798d45fc0575f5d08c0bb7baf4d5d5e8cc0c3 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 4 Mar 2013 13:06:10 -0800 Subject: nl80211: explicit userspace MPM Secure mesh had the implicit requirement that the Mesh Peering Management entity be in userspace. However userspace might want to implement an open MPM as well, so specify a mesh setup parameter to indicate this. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 25 ++++++++++++++++++------- net/wireless/mesh.c | 1 + net/wireless/nl80211.c | 8 ++++++++ 4 files changed, 29 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dfef0d5b5d3..69b2b2631b9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1151,6 +1151,7 @@ struct mesh_config { * @ie_len: length of vendor information elements * @is_authenticated: this mesh requires authentication * @is_secure: this mesh uses security + * @user_mpm: userspace handles all MPM functions * @dtim_period: DTIM period to use * @beacon_interval: beacon interval to use * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] @@ -1168,6 +1169,7 @@ struct mesh_setup { u8 ie_len; bool is_authenticated; bool is_secure; + bool user_mpm; u8 dtim_period; u16 beacon_interval; int mcast_rate[IEEE80211_NUM_BANDS]; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2d0cff57ff8..8134c6a96f5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -513,9 +513,11 @@ * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a * beacon or probe response from a compatible mesh peer. This is only * sent while no station information (sta_info) exists for the new peer - * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH is set. On - * reception of this notification, userspace may decide to create a new - * station (@NL80211_CMD_NEW_STATION). To stop this notification from + * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH, + * @NL80211_MESH_SETUP_USERSPACE_AMPE, or + * @NL80211_MESH_SETUP_USERSPACE_MPM is set. On reception of this + * notification, userspace may decide to create a new station + * (@NL80211_CMD_NEW_STATION). To stop this notification from * reoccurring, the userspace authentication daemon may want to create the * new station with the AUTHENTICATED flag unset and maybe change it later * depending on the authentication result. @@ -1199,10 +1201,10 @@ enum nl80211_commands { * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver * allows auth frames in a mesh to be passed to userspace for processing via * the @NL80211_MESH_SETUP_USERSPACE_AUTH flag. - * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as - * defined in &enum nl80211_plink_state. Used when userspace is - * driving the peer link management state machine. - * @NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled. + * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as defined in + * &enum nl80211_plink_state. Used when userspace is driving the peer link + * management state machine. @NL80211_MESH_SETUP_USERSPACE_AMPE or + * @NL80211_MESH_SETUP_USERSPACE_MPM must be enabled. * * @NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED: indicates, as part of the wiphy * capabilities, the supported WoWLAN triggers @@ -2612,6 +2614,9 @@ enum nl80211_meshconf_params { * vendor specific synchronization method or disable it to use the default * neighbor offset synchronization * + * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will + * implement an MPM which handles peer allocation and state. + * * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number * * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use @@ -2624,6 +2629,7 @@ enum nl80211_mesh_setup_params { NL80211_MESH_SETUP_USERSPACE_AUTH, NL80211_MESH_SETUP_USERSPACE_AMPE, NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC, + NL80211_MESH_SETUP_USERSPACE_MPM, /* keep last */ __NL80211_MESH_SETUP_ATTR_AFTER_LAST, @@ -3526,6 +3532,10 @@ enum nl80211_ap_sme_features { * stations the authenticated/associated bits have to be set in the mask. * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits * (HT40, VHT 80/160 MHz) if this flag is set + * @NL80211_FEATURE_USERSPACE_MPM: This driver supports a userspace Mesh + * Peering Management entity which may be implemented by registering for + * beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is + * still generated by the driver. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3544,6 +3554,7 @@ enum nl80211_feature_flags { /* bit 13 is reserved */ NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, + NL80211_FEATURE_USERSPACE_MPM = 1 << 16, }; /** diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 9688b249a80..0bb93f3061a 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = { .ie = NULL, .ie_len = 0, .is_secure = false, + .user_mpm = false, .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, .dtim_period = MESH_DEFAULT_DTIM_PERIOD, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7469020175d..bdf39836d9d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4618,6 +4618,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, @@ -4756,6 +4757,7 @@ do { \ static int nl80211_parse_mesh_setup(struct genl_info *info, struct mesh_setup *setup) { + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; if (!info->attrs[NL80211_ATTR_MESH_SETUP]) @@ -4792,8 +4794,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, setup->ie = nla_data(ieattr); setup->ie_len = nla_len(ieattr); } + if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] && + !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM)) + return -EINVAL; + setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]); setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]); setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); + if (setup->is_secure) + setup->user_mpm = true; return 0; } -- cgit v1.2.3-70-g09d2 From eef941e6d6be8bce72b5c2963b69f948be4df7a7 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 4 Mar 2013 13:06:11 -0800 Subject: cfg80211: rename mesh station types The mesh station types used to refer to whether the station was secure or nonsecure. Really the salient information is whether it is managed by the kernel or userspace Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- net/mac80211/cfg.c | 4 ++-- net/wireless/nl80211.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 69b2b2631b9..bdba9b61906 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -690,8 +690,8 @@ struct station_parameters { * supported/used) * @CFG80211_STA_TDLS_PEER_ACTIVE: TDLS peer on managed interface (active * entry that is operating, has been marked authorized by userspace) - * @CFG80211_STA_MESH_PEER_NONSEC: peer on mesh interface (non-secured) - * @CFG80211_STA_MESH_PEER_SECURE: peer on mesh interface (secured) + * @CFG80211_STA_MESH_PEER_KERNEL: peer on mesh interface (kernel managed) + * @CFG80211_STA_MESH_PEER_USER: peer on mesh interface (user managed) */ enum cfg80211_station_type { CFG80211_STA_AP_CLIENT, @@ -700,8 +700,8 @@ enum cfg80211_station_type { CFG80211_STA_IBSS, CFG80211_STA_TDLS_PEER_SETUP, CFG80211_STA_TDLS_PEER_ACTIVE, - CFG80211_STA_MESH_PEER_NONSEC, - CFG80211_STA_MESH_PEER_SECURE, + CFG80211_STA_MESH_PEER_KERNEL, + CFG80211_STA_MESH_PEER_USER, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9d708f9e246..6ac89e5c296 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1436,9 +1436,9 @@ static int ieee80211_change_station(struct wiphy *wiphy, switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) - statype = CFG80211_STA_MESH_PEER_SECURE; + statype = CFG80211_STA_MESH_PEER_USER; else - statype = CFG80211_STA_MESH_PEER_NONSEC; + statype = CFG80211_STA_MESH_PEER_KERNEL; break; case NL80211_IFTYPE_ADHOC: statype = CFG80211_STA_IBSS; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bdf39836d9d..946b2e7acdf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3617,8 +3617,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); switch (statype) { - case CFG80211_STA_MESH_PEER_NONSEC: - case CFG80211_STA_MESH_PEER_SECURE: + case CFG80211_STA_MESH_PEER_KERNEL: + case CFG80211_STA_MESH_PEER_USER: /* * No ignoring the TDLS flag here -- the userspace mesh * code doesn't have the bug of including TDLS in the @@ -3720,11 +3720,11 @@ int cfg80211_check_station_change(struct wiphy *wiphy, case CFG80211_STA_TDLS_PEER_ACTIVE: /* reject any changes */ return -EINVAL; - case CFG80211_STA_MESH_PEER_NONSEC: + case CFG80211_STA_MESH_PEER_KERNEL: if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) return -EINVAL; break; - case CFG80211_STA_MESH_PEER_SECURE: + case CFG80211_STA_MESH_PEER_USER: if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) return -EINVAL; break; -- cgit v1.2.3-70-g09d2 From d37bb18ae3a3fa7ef239aad533742a8b07eae15f Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 4 Mar 2013 13:06:13 -0800 Subject: nl80211: user_mpm overrides auto_open_plinks If the user requested a userspace MPM, automatically disable auto_open_plinks to fully disable the kernel MPM. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++-- net/wireless/nl80211.c | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8134c6a96f5..79da8710448 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2467,8 +2467,10 @@ enum nl80211_mesh_power_mode { * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh * point. * - * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically - * open peer links when we detect compatible mesh peers. + * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically open + * peer links when we detect compatible mesh peers. Disabled if + * @NL80211_MESH_SETUP_USERSPACE_MPM or @NL80211_MESH_SETUP_USERSPACE_AMPE are + * set. * * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames * containing a PREQ that an MP can send to a particular destination (path diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 946b2e7acdf..f924d45af1b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7449,6 +7449,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (setup.user_mpm) + cfg.auto_open_plinks = false; + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, &setup.chandef); if (err) -- cgit v1.2.3-70-g09d2 From acbba0d0f88e2577b9d92b61b136d13f65831a52 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Mar 2013 01:31:12 +0000 Subject: team: introduce two default team_modeop functions and use them in modes No need to duplicate code for this. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 19 ++++++++++++++++--- drivers/net/team/team_mode_broadcast.c | 14 ++------------ drivers/net/team/team_mode_roundrobin.c | 14 ++------------ include/linux/if_team.h | 5 ++++- 4 files changed, 24 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 05c5efe8459..ece70a4abbb 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -73,11 +73,24 @@ static int team_port_set_orig_dev_addr(struct team_port *port) return __set_port_dev_addr(port->dev, port->orig.dev_addr); } -int team_port_set_team_dev_addr(struct team_port *port) +static int team_port_set_team_dev_addr(struct team *team, + struct team_port *port) +{ + return __set_port_dev_addr(port->dev, team->dev->dev_addr); +} + +int team_modeop_port_enter(struct team *team, struct team_port *port) +{ + return team_port_set_team_dev_addr(team, port); +} +EXPORT_SYMBOL(team_modeop_port_enter); + +void team_modeop_port_change_dev_addr(struct team *team, + struct team_port *port) { - return __set_port_dev_addr(port->dev, port->team->dev->dev_addr); + team_port_set_team_dev_addr(team, port); } -EXPORT_SYMBOL(team_port_set_team_dev_addr); +EXPORT_SYMBOL(team_modeop_port_change_dev_addr); static void team_refresh_port_linkup(struct team_port *port) { diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c index c5db428e73f..c366cd299c0 100644 --- a/drivers/net/team/team_mode_broadcast.c +++ b/drivers/net/team/team_mode_broadcast.c @@ -46,20 +46,10 @@ static bool bc_transmit(struct team *team, struct sk_buff *skb) return sum_ret; } -static int bc_port_enter(struct team *team, struct team_port *port) -{ - return team_port_set_team_dev_addr(port); -} - -static void bc_port_change_dev_addr(struct team *team, struct team_port *port) -{ - team_port_set_team_dev_addr(port); -} - static const struct team_mode_ops bc_mode_ops = { .transmit = bc_transmit, - .port_enter = bc_port_enter, - .port_change_dev_addr = bc_port_change_dev_addr, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, }; static const struct team_mode bc_mode = { diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index 105135aa8f0..ed63a6bc66c 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -64,20 +64,10 @@ drop: return false; } -static int rr_port_enter(struct team *team, struct team_port *port) -{ - return team_port_set_team_dev_addr(port); -} - -static void rr_port_change_dev_addr(struct team *team, struct team_port *port) -{ - team_port_set_team_dev_addr(port); -} - static const struct team_mode_ops rr_mode_ops = { .transmit = rr_transmit, - .port_enter = rr_port_enter, - .port_change_dev_addr = rr_port_change_dev_addr, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, }; static const struct team_mode rr_mode = { diff --git a/include/linux/if_team.h b/include/linux/if_team.h index cfd21e3d550..3283def7448 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -112,6 +112,10 @@ struct team_mode_ops { void (*port_disabled)(struct team *team, struct team_port *port); }; +extern int team_modeop_port_enter(struct team *team, struct team_port *port); +extern void team_modeop_port_change_dev_addr(struct team *team, + struct team_port *port); + enum team_option_type { TEAM_OPTION_TYPE_U32, TEAM_OPTION_TYPE_STRING, @@ -236,7 +240,6 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team, return NULL; } -extern int team_port_set_team_dev_addr(struct team_port *port); extern int team_options_register(struct team *team, const struct team_option *option, size_t option_count); -- cgit v1.2.3-70-g09d2 From 753f993911b32e479b4fab5d228dc07c11d1e7e7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Mar 2013 01:31:13 +0000 Subject: team: introduce random mode As suggested by Eric Dumazet, allow user to select mode which chooses TX port randomly. Functionality should be more of less similar to round-robin mode with even lower overhead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/Kconfig | 12 ++++++ drivers/net/team/Makefile | 1 + drivers/net/team/team_mode_random.c | 71 +++++++++++++++++++++++++++++++++ drivers/net/team/team_mode_roundrobin.c | 22 +--------- include/linux/if_team.h | 20 ++++++++++ 5 files changed, 105 insertions(+), 21 deletions(-) create mode 100644 drivers/net/team/team_mode_random.c (limited to 'include') diff --git a/drivers/net/team/Kconfig b/drivers/net/team/Kconfig index c3011af68e9..c853d84fd99 100644 --- a/drivers/net/team/Kconfig +++ b/drivers/net/team/Kconfig @@ -37,6 +37,18 @@ config NET_TEAM_MODE_ROUNDROBIN To compile this team mode as a module, choose M here: the module will be called team_mode_roundrobin. +config NET_TEAM_MODE_RANDOM + tristate "Random mode support" + depends on NET_TEAM + ---help--- + Basic mode where port used for transmitting packets is selected + randomly. + + All added ports are setup to have team's device address. + + To compile this team mode as a module, choose M here: the module + will be called team_mode_random. + config NET_TEAM_MODE_ACTIVEBACKUP tristate "Active-backup mode support" depends on NET_TEAM diff --git a/drivers/net/team/Makefile b/drivers/net/team/Makefile index 975763014e5..c57e8588975 100644 --- a/drivers/net/team/Makefile +++ b/drivers/net/team/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_NET_TEAM) += team.o obj-$(CONFIG_NET_TEAM_MODE_BROADCAST) += team_mode_broadcast.o obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o +obj-$(CONFIG_NET_TEAM_MODE_RANDOM) += team_mode_random.o obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o obj-$(CONFIG_NET_TEAM_MODE_LOADBALANCE) += team_mode_loadbalance.o diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c new file mode 100644 index 00000000000..9eabfaa22f3 --- /dev/null +++ b/drivers/net/team/team_mode_random.c @@ -0,0 +1,71 @@ +/* + * drivers/net/team/team_mode_random.c - Random mode for team + * Copyright (c) 2013 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +static u32 random_N(unsigned int N) +{ + return reciprocal_divide(random32(), N); +} + +static bool rnd_transmit(struct team *team, struct sk_buff *skb) +{ + struct team_port *port; + int port_index; + + port_index = random_N(team->en_port_count); + port = team_get_port_by_index_rcu(team, port_index); + port = team_get_first_port_txable_rcu(team, port); + if (unlikely(!port)) + goto drop; + if (team_dev_queue_xmit(team, port, skb)) + return false; + return true; + +drop: + dev_kfree_skb_any(skb); + return false; +} + +static const struct team_mode_ops rnd_mode_ops = { + .transmit = rnd_transmit, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, +}; + +static const struct team_mode rnd_mode = { + .kind = "random", + .owner = THIS_MODULE, + .ops = &rnd_mode_ops, +}; + +static int __init rnd_init_module(void) +{ + return team_mode_register(&rnd_mode); +} + +static void __exit rnd_cleanup_module(void) +{ + team_mode_unregister(&rnd_mode); +} + +module_init(rnd_init_module); +module_exit(rnd_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko "); +MODULE_DESCRIPTION("Random mode for team"); +MODULE_ALIAS("team-mode-random"); diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index ed63a6bc66c..d268e4de781 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -25,26 +25,6 @@ static struct rr_priv *rr_priv(struct team *team) return (struct rr_priv *) &team->mode_priv; } -static struct team_port *__get_first_port_up(struct team *team, - struct team_port *port) -{ - struct team_port *cur; - - if (team_port_txable(port)) - return port; - cur = port; - list_for_each_entry_continue_rcu(cur, &team->port_list, list) - if (team_port_txable(port)) - return cur; - list_for_each_entry_rcu(cur, &team->port_list, list) { - if (cur == port) - break; - if (team_port_txable(port)) - return cur; - } - return NULL; -} - static bool rr_transmit(struct team *team, struct sk_buff *skb) { struct team_port *port; @@ -52,7 +32,7 @@ static bool rr_transmit(struct team *team, struct sk_buff *skb) port_index = rr_priv(team)->sent_packets++ % team->en_port_count; port = team_get_port_by_index_rcu(team, port_index); - port = __get_first_port_up(team, port); + port = team_get_first_port_txable_rcu(team, port); if (unlikely(!port)) goto drop; if (team_dev_queue_xmit(team, port, skb)) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 3283def7448..4474557904f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -240,6 +240,26 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team, return NULL; } +static inline struct team_port * +team_get_first_port_txable_rcu(struct team *team, struct team_port *port) +{ + struct team_port *cur; + + if (likely(team_port_txable(port))) + return port; + cur = port; + list_for_each_entry_continue_rcu(cur, &team->port_list, list) + if (team_port_txable(port)) + return cur; + list_for_each_entry_rcu(cur, &team->port_list, list) { + if (cur == port) + break; + if (team_port_txable(port)) + return cur; + } + return NULL; +} + extern int team_options_register(struct team *team, const struct team_option *option, size_t option_count); -- cgit v1.2.3-70-g09d2 From 6906f4ed6f85b2d72fd944e15da6a905fdde8b40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2013 06:49:21 +0000 Subject: htb: add HTB_DIRECT_QLEN attribute HTB uses an internal pfifo queue, which limit is not reported to userland tools (tc), and value inherited from device tx_queue_len at setup time. Introduce TCA_HTB_DIRECT_QLEN attribute to allow finer control. Remove two obsolete pr_err() calls as well. Signed-off-by: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + net/sched/sch_htb.c | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 32aef0a439e..dbd71b0c7d8 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -348,6 +348,7 @@ enum { TCA_HTB_INIT, TCA_HTB_CTAB, TCA_HTB_RTAB, + TCA_HTB_DIRECT_QLEN, __TCA_HTB_MAX, }; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 571f1d211f4..79b1876b6cd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, + [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, }; static void htb_work_func(struct work_struct *work) @@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work) static int htb_init(struct Qdisc *sch, struct nlattr *opt) { struct htb_sched *q = qdisc_priv(sch); - struct nlattr *tb[TCA_HTB_INIT + 1]; + struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; int err; int i; @@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy); + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); if (err < 0) return err; - if (tb[TCA_HTB_INIT] == NULL) { - pr_err("HTB: hey probably you have bad tc tool ?\n"); + if (!tb[TCA_HTB_INIT]) return -EINVAL; - } + gopt = nla_data(tb[TCA_HTB_INIT]); - if (gopt->version != HTB_VER >> 16) { - pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n", - HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); + if (gopt->version != HTB_VER >> 16) return -EINVAL; - } err = qdisc_class_hash_init(&q->clhash); if (err < 0) @@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) INIT_WORK(&q->work, htb_work_func); skb_queue_head_init(&q->direct_queue); - q->direct_qlen = qdisc_dev(sch)->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ - q->direct_qlen = 2; - + if (tb[TCA_HTB_DIRECT_QLEN]) + q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); + else { + q->direct_qlen = qdisc_dev(sch)->tx_queue_len; + if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ + q->direct_qlen = 2; + } if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; @@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt)) + if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || + nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[__TCA_HTB_MAX]; + struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; /* extract all subattrs from opt attr */ -- cgit v1.2.3-70-g09d2 From 8524982847ff00b66ffb89314c342c51f4138ee7 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 18 Feb 2013 21:47:45 +0100 Subject: ssb: fix unaligned access to mac address The mac address should be aligned to u16 to prevent an unaligned access in drivers/ssb/pci.c where it is casted to __be16. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- include/linux/ssb/ssb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 22958d68ecf..8b1322296fe 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -26,9 +26,9 @@ struct ssb_sprom_core_pwr_info { struct ssb_sprom { u8 revision; - u8 il0mac[6]; /* MAC address for 802.11b/g */ - u8 et0mac[6]; /* MAC address for Ethernet */ - u8 et1mac[6]; /* MAC address for 802.11a */ + u8 il0mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11b/g */ + u8 et0mac[6] __aligned(sizeof(u16)); /* MAC address for Ethernet */ + u8 et1mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11a */ u8 et0phyaddr; /* MII address for enet0 */ u8 et1phyaddr; /* MII address for enet1 */ u8 et0mdcport; /* MDIO for enet0 */ -- cgit v1.2.3-70-g09d2 From e943789edbb1f9de71b129d9992489eb79ed341f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Feb 2013 21:38:08 +0100 Subject: mac80211: provide ieee80211_sta_eosp() The irqsafe version ieee80211_sta_eosp_irqsafe() exists, but drivers must not mix calls to any irqsafe/non-irqsafe function. Both ath9k and iwlwifi, the likely first users of this interface, use non-irqsafe RX/TX/TX status so must also use a non-irqsafe version of this function. Since no driver uses the _irqsafe() version, remove that. Signed-off-by: Johannes Berg --- Documentation/DocBook/80211.tmpl | 2 +- include/net/mac80211.h | 25 ++++++++++++++----------- net/mac80211/ieee80211_i.h | 5 ----- net/mac80211/main.c | 14 -------------- net/mac80211/sta_info.c | 20 +++----------------- 5 files changed, 18 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 284ced7a228..0f6a3edcd44 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -437,7 +437,7 @@ !Finclude/net/mac80211.h ieee80211_get_buffered_bc !Finclude/net/mac80211.h ieee80211_beacon_get -!Finclude/net/mac80211.h ieee80211_sta_eosp_irqsafe +!Finclude/net/mac80211.h ieee80211_sta_eosp !Finclude/net/mac80211.h ieee80211_frame_release_type !Finclude/net/mac80211.h ieee80211_sta_ps_transition !Finclude/net/mac80211.h ieee80211_sta_ps_transition_ni diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cdd7cea1fd4..8c0ca11a39c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1946,14 +1946,14 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * filter those response frames except in the case of frames that * are buffered in the driver -- those must remain buffered to avoid * reordering. Because it is possible that no frames are released - * in this case, the driver must call ieee80211_sta_eosp_irqsafe() + * in this case, the driver must call ieee80211_sta_eosp() * to indicate to mac80211 that the service period ended anyway. * * Finally, if frames from multiple TIDs are released from mac80211 * but the driver might reorder them, it must clear & set the flags * appropriately (only the last frame may have %IEEE80211_TX_STATUS_EOSP) * and also take care of the EOSP and MORE_DATA bits in the frame. - * The driver may also use ieee80211_sta_eosp_irqsafe() in this case. + * The driver may also use ieee80211_sta_eosp() in this case. */ /** @@ -2506,7 +2506,7 @@ enum ieee80211_roc_type { * setting the EOSP flag in the QoS header of the frames. Also, when the * service period ends, the driver must set %IEEE80211_TX_STATUS_EOSP * on the last frame in the SP. Alternatively, it may call the function - * ieee80211_sta_eosp_irqsafe() to inform mac80211 of the end of the SP. + * ieee80211_sta_eosp() to inform mac80211 of the end of the SP. * This callback must be atomic. * @allow_buffered_frames: Prepare device to allow the given number of frames * to go out to the given station. The frames will be sent by mac80211 @@ -2517,7 +2517,7 @@ enum ieee80211_roc_type { * them between the TIDs, it must set the %IEEE80211_TX_STATUS_EOSP flag * on the last frame and clear it on all others and also handle the EOSP * bit in the QoS header correctly. Alternatively, it can also call the - * ieee80211_sta_eosp_irqsafe() function. + * ieee80211_sta_eosp() function. * The @tids parameter is a bitmap and tells the driver which TIDs the * frames will be on; it will at most have two bits set. * This callback must be atomic. @@ -3857,14 +3857,17 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * %IEEE80211_TX_STATUS_EOSP bit and call this function instead. * This applies for PS-Poll as well as uAPSD. * - * Note that there is no non-_irqsafe version right now as - * it wasn't needed, but just like _tx_status() and _rx() - * must not be mixed in irqsafe/non-irqsafe versions, this - * function must not be mixed with those either. Use the - * all irqsafe, or all non-irqsafe, don't mix! If you need - * the non-irqsafe version of this, you need to add it. + * Note that just like with _tx_status() and _rx() drivers must + * not mix calls to irqsafe/non-irqsafe versions, this function + * must not be mixed with those either. Use the all irqsafe, or + * all non-irqsafe, don't mix! + * + * NB: the _irqsafe version of this function doesn't exist, no + * driver needs it right now. Don't call this function if + * you'd need the _irqsafe version, look at the git history + * and restore the _irqsafe version! */ -void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta); +void ieee80211_sta_eosp(struct ieee80211_sta *pubsta); /** * ieee80211_iter_keys - iterate keys programmed into the device diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f4433f081e7..95beb18588f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -800,11 +800,6 @@ enum sdata_queue_type { enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, - IEEE80211_EOSP_MSG = 3, -}; - -struct skb_eosp_msg_data { - u8 sta[ETH_ALEN], iface[ETH_ALEN]; }; enum queue_stop_reason { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5a53aa5ede8..5531c89909d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -226,8 +226,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; - struct sta_info *sta, *tmp; - struct skb_eosp_msg_data *eosp_data; struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -243,18 +241,6 @@ static void ieee80211_tasklet_handler(unsigned long data) skb->pkt_type = 0; ieee80211_tx_status(&local->hw, skb); break; - case IEEE80211_EOSP_MSG: - eosp_data = (void *)skb->cb; - for_each_sta_info(local, eosp_data->sta, sta, tmp) { - /* skip wrong virtual interface */ - if (memcmp(eosp_data->iface, - sta->sdata->vif.addr, ETH_ALEN)) - continue; - clear_sta_flag(sta, WLAN_STA_SP); - break; - } - dev_kfree_skb(skb); - break; default: WARN(1, "mac80211: Packet is of unknown type %d\n", skb->pkt_type); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3644ad79688..852bf45fcfa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1390,30 +1390,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_sta_block_awake); -void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) +void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_local *local = sta->local; - struct sk_buff *skb; - struct skb_eosp_msg_data *data; trace_api_eosp(local, pubsta); - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) { - /* too bad ... but race is better than loss */ - clear_sta_flag(sta, WLAN_STA_SP); - return; - } - - data = (void *)skb->cb; - memcpy(data->sta, pubsta->addr, ETH_ALEN); - memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN); - skb->pkt_type = IEEE80211_EOSP_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + clear_sta_flag(sta, WLAN_STA_SP); } -EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); +EXPORT_SYMBOL(ieee80211_sta_eosp); void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) -- cgit v1.2.3-70-g09d2 From 090096bf3db1c281ddd034573260045888a68fea Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 6 Mar 2013 15:39:42 +0000 Subject: net: generic fdb support for drivers without ndo_fdb_ If the driver does not support the ndo_op use the generic handler for it. This should work in the majority of cases. Eventually the fdb_dflt_add call gets translated into a __dev_set_rx_mode() call which should handle hardware support for filtering via the IFF_UNICAST_FLT flag. Namely IFF_UNICAST_FLT indicates if the hardware can do unicast address filtering. If no support is available the device is put into promisc mode. Signed-off-by: Vlad Yasevich Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 9 ++++++ net/core/rtnetlink.c | 81 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 84 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 489dd7bb28e..f28544b2f9a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -69,6 +69,15 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, int idx); +extern int ndo_dflt_fdb_add(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags); +extern int ndo_dflt_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b376410ff25..f95b6fbc29e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2048,6 +2048,38 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } +/** + * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry + */ +int ndo_dflt_fdb_add(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags) +{ + int err = -EINVAL; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return err; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_add); + static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -2100,10 +2132,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { - err = dev->netdev_ops->ndo_fdb_add(ndm, tb, - dev, addr, - nlh->nlmsg_flags); + if ((ndm->ndm_flags & NTF_SELF)) { + if (dev->netdev_ops->ndo_fdb_add) + err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); + else + err = ndo_dflt_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); if (!err) { rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); @@ -2114,6 +2149,35 @@ out: return err; } +/** + * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry + */ +int ndo_dflt_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr) +{ + int err = -EOPNOTSUPP; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state & NUD_PERMANENT) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_del); + static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -2171,8 +2235,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + if (ndm->ndm_flags & NTF_SELF) { + if (dev->netdev_ops->ndo_fdb_del) + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + else + err = ndo_dflt_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2257,6 +2324,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) if (dev->netdev_ops->ndo_fdb_dump) idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); + else + ndo_dflt_fdb_dump(skb, cb, dev, idx); } rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From b2fb4f54ecd47c42413d54b4666b06cf93c05abf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2013 12:58:01 +0000 Subject: tcp: uninline tcp_prequeue() tcp_prequeue() became too big to be inlined. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 45 +-------------------------------------------- net/ipv4/tcp_ipv4.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index cf0694d4ad6..a2baa5e4ba3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1030,50 +1030,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) #endif } -/* Packet is added to VJ-style prequeue for processing in process - * context, if a reader task is waiting. Apparently, this exciting - * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) - * failed somewhere. Latency? Burstiness? Well, at least now we will - * see, why it failed. 8)8) --ANK - * - * NOTE: is this not too big to inline? - */ -static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (sysctl_tcp_low_latency || !tp->ucopy.task) - return false; - - if (skb->len <= tcp_hdrlen(skb) && - skb_queue_len(&tp->ucopy.prequeue) == 0) - return false; - - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (tp->ucopy.memory > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPPREQUEUEDROPPED); - } - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk_sleep(sk), - POLLIN | POLLRDNORM | POLLRDBAND); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * tcp_rto_min(sk)) / 4, - TCP_RTO_MAX); - } - return true; -} - +extern bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a8ec457310..8cdee120a50 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1950,6 +1950,50 @@ void tcp_v4_early_demux(struct sk_buff *skb) } } +/* Packet is added to VJ-style prequeue for processing in process + * context, if a reader task is waiting. Apparently, this exciting + * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) + * failed somewhere. Latency? Burstiness? Well, at least now we will + * see, why it failed. 8)8) --ANK + * + */ +bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sysctl_tcp_low_latency || !tp->ucopy.task) + return false; + + if (skb->len <= tcp_hdrlen(skb) && + skb_queue_len(&tp->ucopy.prequeue) == 0) + return false; + + __skb_queue_tail(&tp->ucopy.prequeue, skb); + tp->ucopy.memory += skb->truesize; + if (tp->ucopy.memory > sk->sk_rcvbuf) { + struct sk_buff *skb1; + + BUG_ON(sock_owned_by_user(sk)); + + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + sk_backlog_rcv(sk, skb1); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPPREQUEUEDROPPED); + } + + tp->ucopy.memory = 0; + } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { + wake_up_interruptible_sync_poll(sk_sleep(sk), + POLLIN | POLLRDNORM | POLLRDBAND); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * tcp_rto_min(sk)) / 4, + TCP_RTO_MAX); + } + return true; +} +EXPORT_SYMBOL(tcp_prequeue); + /* * From tcp_input.c */ -- cgit v1.2.3-70-g09d2 From 7f0e44ac9f7f12a2519bfed9ea4df3c1471bd8bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2013 04:20:32 +0000 Subject: ipv6 flowlabel: add __rcu annotations Commit 18367681a10b (ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.) omitted proper __rcu annotations. Signed-off-by: Eric Dumazet Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 8 ++++---- net/ipv6/ip6_flowlabel.c | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 64d12e77719..988c9f28f0f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -217,7 +217,7 @@ struct ipv6_txoptions { }; struct ip6_flowlabel { - struct ip6_flowlabel *next; + struct ip6_flowlabel __rcu *next; __be32 label; atomic_t users; struct in6_addr dst; @@ -238,9 +238,9 @@ struct ip6_flowlabel { #define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) struct ipv6_fl_socklist { - struct ipv6_fl_socklist *next; - struct ip6_flowlabel *fl; - struct rcu_head rcu; + struct ipv6_fl_socklist __rcu *next; + struct ip6_flowlabel *fl; + struct rcu_head rcu; }; extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b973ed3d06c..46e88433ec7 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy) spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net) spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; - struct ipv6_fl_socklist *sfl, **sflp; + struct ipv6_fl_socklist *sfl; + struct ipv6_fl_socklist __rcu **sflp; struct ip6_flowlabel *fl, *fl1 = NULL; -- cgit v1.2.3-70-g09d2 From bed71748346ae0807c7f7a2913965508dbd61403 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Wed, 30 Jan 2013 11:50:56 -0300 Subject: Bluetooth: Rename hci_acl_disconn As hci_acl_disconn function basically sends the HCI Disconnect Command and it is used to disconnect ACL, SCO and LE links, renaming it to hci_disconnect is more suitable. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 90cf75afcb0..787d3b9bbd5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -574,7 +574,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, return NULL; } -void hci_acl_disconn(struct hci_conn *conn, __u8 reason); +void hci_disconnect(struct hci_conn *conn, __u8 reason); void hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4925a02ae7e..b9f90169940 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -117,7 +117,7 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn) hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); } -void hci_acl_disconn(struct hci_conn *conn, __u8 reason) +void hci_disconnect(struct hci_conn *conn, __u8 reason) { struct hci_cp_disconnect cp; @@ -253,7 +253,7 @@ static void hci_conn_disconnect(struct hci_conn *conn) hci_amp_disconn(conn, reason); break; default: - hci_acl_disconn(conn, reason); + hci_disconnect(conn, reason); break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 60793e7b768..4cb46c24a74 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2398,7 +2398,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) if (c->type == type && c->sent) { BT_ERR("%s killing stalled connection %pMR", hdev->name, &c->dst); - hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM); + hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); } } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 477726a6351..5892e54835a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2399,7 +2399,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); if (ev->status && conn->state == BT_CONNECTED) { - hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_put(conn); goto unlock; } @@ -3472,7 +3472,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); if (ev->status && conn->state == BT_CONNECTED) { - hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_put(conn); goto unlock; } -- cgit v1.2.3-70-g09d2 From be9f97f04565a6c438b7521ad679870d25645475 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sun, 24 Feb 2013 19:36:52 +0100 Subject: Bluetooth: change bt_sock_unregister() to return void There is no reason a caller ever wants to check the return type of this call. _Iff_ a user successfully called bt_sock_register(), they're allowed to call bt_sock_unregister(). All other calls in the kernel (device_del, device_unregister, kfree(), ..) that are logically equivalent return void. Lets not make callers think they have to check the return type of this call and instead simply return void. We guarantee that after bt_sock_unregister() is called, the socket type _is_ unregistered. If that is not what the caller wants, they're using the wrong function, anyway. Signed-off-by: David Herrmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/bluetooth.h | 2 +- net/bluetooth/af_bluetooth.c | 15 +++------------ 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 9531beee09b..5f51bef13e6 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -232,7 +232,7 @@ struct bt_sock_list { }; int bt_sock_register(int proto, const struct net_proto_family *ops); -int bt_sock_unregister(int proto); +void bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d3ee69b35a7..81598e588f7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops) } EXPORT_SYMBOL(bt_sock_register); -int bt_sock_unregister(int proto) +void bt_sock_unregister(int proto) { - int err = 0; - if (proto < 0 || proto >= BT_MAX_PROTO) - return -EINVAL; + return; write_lock(&bt_proto_lock); - - if (!bt_proto[proto]) - err = -ENOENT; - else - bt_proto[proto] = NULL; - + bt_proto[proto] = NULL; write_unlock(&bt_proto_lock); - - return err; } EXPORT_SYMBOL(bt_sock_unregister); -- cgit v1.2.3-70-g09d2 From 8ff52f7d04d9cc31f1e81dcf9a2ba6335ed34905 Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Thu, 28 Feb 2013 14:21:55 +0000 Subject: Bluetooth: Return RFCOMM session ptrs to avoid freed session Unfortunately, the design retains local copies of the s RFCOMM session pointer in various code blocks and this invites the erroneous access to a freed RFCOMM session structure. Therefore, return the RFCOMM session pointer back up the call stack to avoid accessing a freed RFCOMM session structure. When the RFCOMM session is deleted, NULL is passed up the call stack. If active DLCs exist when the rfcomm session is terminating, avoid a memory leak of rfcomm_dlc structures by ensuring that rfcomm_session_close() is used instead of rfcomm_session_del(). Signed-off-by: Dean Jenkins Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/rfcomm.h | 3 +- net/bluetooth/rfcomm/core.c | 106 ++++++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index e2e3ecad100..a4e38ead228 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -278,7 +278,8 @@ void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, static inline void rfcomm_session_hold(struct rfcomm_session *s) { - atomic_inc(&s->refcnt); + if (s) + atomic_inc(&s->refcnt); } /* ---- RFCOMM sockets ---- */ diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index d9e97cf1792..2b5c543638b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, u8 sec_level, int *err); static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); -static void rfcomm_session_del(struct rfcomm_session *s); +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); /* ---- RFCOMM frame parsing macros ---- */ #define __get_dlci(b) ((b & 0xfc) >> 2) @@ -108,10 +108,12 @@ static void rfcomm_schedule(void) wake_up_process(rfcomm_thread); } -static void rfcomm_session_put(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_session_put(struct rfcomm_session *s) { - if (atomic_dec_and_test(&s->refcnt)) - rfcomm_session_del(s); + if (s && atomic_dec_and_test(&s->refcnt)) + s = rfcomm_session_del(s); + + return s; } /* ---- RFCOMM FCS computation ---- */ @@ -631,7 +633,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) return s; } -static void rfcomm_session_del(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s) { int state = s->state; @@ -648,6 +650,8 @@ static void rfcomm_session_del(struct rfcomm_session *s) if (state != BT_LISTEN) module_put(THIS_MODULE); + + return NULL; } static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) @@ -666,7 +670,8 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) return NULL; } -static void rfcomm_session_close(struct rfcomm_session *s, int err) +static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, + int err) { struct rfcomm_dlc *d; struct list_head *p, *n; @@ -685,7 +690,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err) } rfcomm_session_clear_timer(s); - rfcomm_session_put(s); + return rfcomm_session_put(s); } static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, @@ -737,8 +742,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, if (*err == 0 || *err == -EINPROGRESS) return s; - rfcomm_session_del(s); - return NULL; + return rfcomm_session_del(s); failed: sock_release(sock); @@ -1127,7 +1131,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr) } /* ---- RFCOMM frame reception ---- */ -static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) { BT_DBG("session %p state %ld dlci %d", s, s->state, dlci); @@ -1136,7 +1140,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci); if (!d) { rfcomm_send_dm(s, dlci); - return 0; + return s; } switch (d->state) { @@ -1172,25 +1176,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) break; case BT_DISCONN: - /* rfcomm_session_put is called later so don't do - * anything here otherwise we will mess up the session - * reference counter: - * - * (a) when we are the initiator dlc_unlink will drive - * the reference counter to 0 (there is no initial put - * after session_add) - * - * (b) when we are not the initiator rfcomm_rx_process - * will explicitly call put to balance the initial hold - * done after session add. - */ + s = rfcomm_session_close(s, ECONNRESET); break; } } - return 0; + return s; } -static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) { int err = 0; @@ -1215,12 +1208,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) err = ECONNRESET; s->state = BT_CLOSED; - rfcomm_session_close(s, err); + s = rfcomm_session_close(s, err); } - return 0; + return s; } -static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s, + u8 dlci) { int err = 0; @@ -1250,10 +1244,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) err = ECONNRESET; s->state = BT_CLOSED; - rfcomm_session_close(s, err); + s = rfcomm_session_close(s, err); } - - return 0; + return s; } void rfcomm_dlc_accept(struct rfcomm_dlc *d) @@ -1674,11 +1667,18 @@ drop: return 0; } -static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) +static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s, + struct sk_buff *skb) { struct rfcomm_hdr *hdr = (void *) skb->data; u8 type, dlci, fcs; + if (!s) { + /* no session, so free socket data */ + kfree_skb(skb); + return s; + } + dlci = __get_dlci(hdr->addr); type = __get_type(hdr->ctrl); @@ -1689,7 +1689,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) if (__check_fcs(skb->data, type, fcs)) { BT_ERR("bad checksum in packet"); kfree_skb(skb); - return -EILSEQ; + return s; } if (__test_ea(hdr->len)) @@ -1705,22 +1705,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) case RFCOMM_DISC: if (__test_pf(hdr->ctrl)) - rfcomm_recv_disc(s, dlci); + s = rfcomm_recv_disc(s, dlci); break; case RFCOMM_UA: if (__test_pf(hdr->ctrl)) - rfcomm_recv_ua(s, dlci); + s = rfcomm_recv_ua(s, dlci); break; case RFCOMM_DM: - rfcomm_recv_dm(s, dlci); + s = rfcomm_recv_dm(s, dlci); break; case RFCOMM_UIH: - if (dlci) - return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); - + if (dlci) { + rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); + return s; + } rfcomm_recv_mcc(s, skb); break; @@ -1729,7 +1730,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) break; } kfree_skb(skb); - return 0; + return s; } /* ---- Connection and data processing ---- */ @@ -1866,7 +1867,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s) } } -static void rfcomm_process_rx(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) { struct socket *sock = s->sock; struct sock *sk = sock->sk; @@ -1878,17 +1879,20 @@ static void rfcomm_process_rx(struct rfcomm_session *s) while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); if (!skb_linearize(skb)) - rfcomm_recv_frame(s, skb); + s = rfcomm_recv_frame(s, skb); else kfree_skb(skb); } - if (sk->sk_state == BT_CLOSED) { + if (s && (sk->sk_state == BT_CLOSED)) { if (!s->initiator) - rfcomm_session_put(s); + s = rfcomm_session_put(s); - rfcomm_session_close(s, sk->sk_err); + if (s) + s = rfcomm_session_close(s, sk->sk_err); } + + return s; } static void rfcomm_accept_connection(struct rfcomm_session *s) @@ -1925,7 +1929,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s) sock_release(nsock); } -static void rfcomm_check_connection(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s) { struct sock *sk = s->sock->sk; @@ -1944,9 +1948,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s) case BT_CLOSED: s->state = BT_CLOSED; - rfcomm_session_close(s, sk->sk_err); + s = rfcomm_session_close(s, sk->sk_err); break; } + return s; } static void rfcomm_process_sessions(void) @@ -1975,15 +1980,16 @@ static void rfcomm_process_sessions(void) switch (s->state) { case BT_BOUND: - rfcomm_check_connection(s); + s = rfcomm_check_connection(s); break; default: - rfcomm_process_rx(s); + s = rfcomm_process_rx(s); break; } - rfcomm_process_dlcs(s); + if (s) + rfcomm_process_dlcs(s); rfcomm_session_put(s); } -- cgit v1.2.3-70-g09d2 From 08c30aca9e698faddebd34f81e1196295f9dc063 Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Thu, 28 Feb 2013 14:21:56 +0000 Subject: Bluetooth: Remove RFCOMM session refcnt Previous commits have improved the handling of the RFCOMM session timer and the RFCOMM session pointers such that freed RFCOMM session structures should no longer be erroneously accessed. The RFCOMM session refcnt now has no purpose and will be deleted by this commit. Note that the RFCOMM session is now deleted as soon as the RFCOMM control channel link is no longer required. This makes the lifetime of the RFCOMM session deterministic and absolute. Previously with the refcnt, there was uncertainty about when the session structure would be deleted because the relative refcnt prevented the session structure from being deleted at will. It was noted that the refcnt could malfunction under very heavy real-time processor loading in embedded SMP environments. This could cause premature RFCOMM session deletion or double session deletion that could result in kernel crashes. Removal of the refcnt prevents this issue. There are 4 connection / disconnection RFCOMM session scenarios: host initiated control link ---> host disconnected control link host initiated ctrl link ---> remote device disconnected ctrl link remote device initiated ctrl link ---> host disconnected ctrl link remote device initiated ctrl link ---> remote device disc'ed ctrl link The control channel connection procedures are independent of the disconnection procedures. Strangely, the RFCOMM session refcnt was applying special treatment so erroneously combining connection and disconnection events. This commit fixes this issue by removing some session code that used the "initiator" member of the session structure that was intended for use with the data channels. Signed-off-by: Dean Jenkins Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/rfcomm.h | 7 ------- net/bluetooth/rfcomm/core.c | 43 +++++------------------------------------- 2 files changed, 5 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index a4e38ead228..7afd4199d6b 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -158,7 +158,6 @@ struct rfcomm_session { struct timer_list timer; unsigned long state; unsigned long flags; - atomic_t refcnt; int initiator; /* Default DLC parameters */ @@ -276,12 +275,6 @@ static inline void rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, bdaddr_t *dst); -static inline void rfcomm_session_hold(struct rfcomm_session *s) -{ - if (s) - atomic_inc(&s->refcnt); -} - /* ---- RFCOMM sockets ---- */ struct sockaddr_rc { sa_family_t rc_family; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 2b5c543638b..75b7bbd8acb 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -108,14 +108,6 @@ static void rfcomm_schedule(void) wake_up_process(rfcomm_thread); } -static struct rfcomm_session *rfcomm_session_put(struct rfcomm_session *s) -{ - if (s && atomic_dec_and_test(&s->refcnt)) - s = rfcomm_session_del(s); - - return s; -} - /* ---- RFCOMM FCS computation ---- */ /* reversed, 8-bit, poly=0x07 */ @@ -251,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout) { BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout); - if (!mod_timer(&s->timer, jiffies + timeout)) - rfcomm_session_hold(s); + mod_timer(&s->timer, jiffies + timeout); } static void rfcomm_session_clear_timer(struct rfcomm_session *s) { BT_DBG("session %p state %ld", s, s->state); - if (del_timer_sync(&s->timer)) - rfcomm_session_put(s); + del_timer_sync(&s->timer); } /* ---- RFCOMM DLCs ---- */ @@ -338,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d) { BT_DBG("dlc %p session %p", d, s); - rfcomm_session_hold(s); - rfcomm_session_clear_timer(s); rfcomm_dlc_hold(d); list_add(&d->list, &s->dlcs); @@ -358,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d) if (list_empty(&s->dlcs)) rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT); - - rfcomm_session_put(s); } static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci) @@ -678,8 +664,6 @@ static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, BT_DBG("session %p state %ld err %d", s, s->state, err); - rfcomm_session_hold(s); - s->state = BT_CLOSED; /* Close all dlcs */ @@ -690,7 +674,7 @@ static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, } rfcomm_session_clear_timer(s); - return rfcomm_session_put(s); + return rfcomm_session_del(s); } static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, @@ -1884,13 +1868,8 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) kfree_skb(skb); } - if (s && (sk->sk_state == BT_CLOSED)) { - if (!s->initiator) - s = rfcomm_session_put(s); - - if (s) - s = rfcomm_session_close(s, sk->sk_err); - } + if (s && (sk->sk_state == BT_CLOSED)) + s = rfcomm_session_close(s, sk->sk_err); return s; } @@ -1917,8 +1896,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s) s = rfcomm_session_add(nsock, BT_OPEN); if (s) { - rfcomm_session_hold(s); - /* We should adjust MTU on incoming sessions. * L2CAP MTU minus UIH header and FCS. */ s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu, @@ -1967,7 +1944,6 @@ static void rfcomm_process_sessions(void) if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); - rfcomm_session_put(s); continue; } @@ -1976,8 +1952,6 @@ static void rfcomm_process_sessions(void) continue; } - rfcomm_session_hold(s); - switch (s->state) { case BT_BOUND: s = rfcomm_check_connection(s); @@ -1990,8 +1964,6 @@ static void rfcomm_process_sessions(void) if (s) rfcomm_process_dlcs(s); - - rfcomm_session_put(s); } rfcomm_unlock(); @@ -2041,7 +2013,6 @@ static int rfcomm_add_listener(bdaddr_t *ba) if (!s) goto failed; - rfcomm_session_hold(s); return 0; failed: sock_release(sock); @@ -2099,8 +2070,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) if (!s) return; - rfcomm_session_hold(s); - list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); @@ -2132,8 +2101,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) set_bit(RFCOMM_AUTH_REJECT, &d->flags); } - rfcomm_session_put(s); - rfcomm_schedule(); } -- cgit v1.2.3-70-g09d2 From 3119ae9599e5cdc1b9838563905c500b582ab6a5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:44 +0200 Subject: Bluetooth: Add initial skeleton for asynchronous HCI requests This patch adds the initial definitions and functions for asynchronous HCI requests. Asynchronous requests are essentially a group of HCI commands together with an optional completion callback. The request is tracked through the already existing command queue by having the necessary context information as part of the control buffer of each skb. The only information needed in the skb control buffer is a flag for indicating that the skb is the start of a request as well as the optional complete callback that should be used when the request is complete (this will be found in the last skb of the request). Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/bluetooth.h | 10 ++++++++++ include/net/bluetooth/hci_core.h | 8 ++++++++ net/bluetooth/hci_core.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 5f51bef13e6..ed6e9552252 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -260,12 +260,22 @@ struct l2cap_ctrl { __u8 retries; }; +struct hci_dev; + +typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status); + +struct hci_req_ctrl { + bool start; + hci_req_complete_t complete; +}; + struct bt_skb_cb { __u8 pkt_type; __u8 incoming; __u16 expect; __u8 force_active; struct l2cap_ctrl control; + struct hci_req_ctrl req; }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 787d3b9bbd5..7191217c6bd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1041,6 +1041,14 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; +}; + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev); +int hci_req_run(struct hci_request *req, hci_req_complete_t complete); + int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6ab38fecf1f..94b08aa9a08 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2439,6 +2439,36 @@ static int hci_send_frame(struct sk_buff *skb) return hdev->send(skb); } +void hci_req_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; +} + +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + + /* Do not allow empty requests */ + if (skb_queue_empty(&req->cmd_q)) + return -EINVAL; + + skb = skb_peek_tail(&req->cmd_q); + bt_cb(skb)->req.complete = complete; + + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + + queue_work(hdev->workqueue, &hdev->cmd_work); + + return 0; +} + /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) { -- cgit v1.2.3-70-g09d2 From 71c76a170e979d60e01bd093c9b79e3adeb710cc Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:46 +0200 Subject: Bluetooth: Introduce new hci_req_add function This function is analogous to hci_send_cmd() but instead of directly queuing the command to hdev->cmd_q it adds it to the local queue of the asynchronous HCI request being build (inside struct hci_request). This is the main function used for building asynchronous requests and there should be one or more calls to it between calls to hci_req_init and hci_req_run. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7191217c6bd..67fe661259b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1048,6 +1048,7 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); +int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d2edcc4643c..6e6a9dd8a15 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2517,6 +2517,28 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) return 0; } +/* Queue a command to an asynchronous HCI request */ +int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + + BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + + skb = hci_prepare_cmd(hdev, opcode, plen, param); + if (!skb) { + BT_ERR("%s no memory for command", hdev->name); + return -ENOMEM; + } + + if (skb_queue_empty(&req->cmd_q)) + bt_cb(skb)->req.start = true; + + skb_queue_tail(&req->cmd_q, skb); + + return 0; +} + /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { -- cgit v1.2.3-70-g09d2 From 9238f36a5a5097018b90baa42c473d2f916a46f5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:48 +0200 Subject: Bluetooth: Add request cmd_complete and cmd_status functions This patch introduces functions to process the HCI request state when receiving HCI Command Status or Command Complete events. Some HCI commands, like Inquiry do not result in a Command complete event so special handling is needed for them. Inquiry is a particularly important one since it is the only forseeable "non-cmd_complete" command that will make good use of the request functionality, and its completion is either indicated by an Inquiry Complete event of a successful Command Complete for HCI_Inquiry_Cancel. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_core.c | 85 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_event.c | 7 ++++ 3 files changed, 94 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 67fe661259b..d732d6894ad 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1049,6 +1049,8 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); +void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4f8142bdf65..0ada2ec36e7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3208,6 +3208,91 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } +static bool hci_req_is_complete(struct hci_dev *hdev) +{ + struct sk_buff *skb; + + skb = skb_peek(&hdev->cmd_q); + if (!skb) + return true; + + return bt_cb(skb)->req.start; +} + +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) +{ + hci_req_complete_t req_complete = NULL; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); + + /* Check that the completed command really matches the last one + * that was sent. + */ + if (!hci_sent_cmd_data(hdev, opcode)) + return; + + /* If the command succeeded and there's still more commands in + * this request the request is not yet complete. + */ + if (!status && !hci_req_is_complete(hdev)) + return; + + /* If this was the last command in a request the complete + * callback would be found in hdev->sent_cmd instead of the + * command queue (hdev->cmd_q). + */ + if (hdev->sent_cmd) { + req_complete = bt_cb(hdev->sent_cmd)->req.complete; + if (req_complete) + goto call_complete; + } + + /* Remove all pending commands belonging to this request */ + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + while ((skb = __skb_dequeue(&hdev->cmd_q))) { + if (bt_cb(skb)->req.start) { + __skb_queue_head(&hdev->cmd_q, skb); + break; + } + + req_complete = bt_cb(skb)->req.complete; + kfree_skb(skb); + } + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + +call_complete: + if (req_complete) + req_complete(hdev, status); +} + +void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status) +{ + hci_req_complete_t req_complete = NULL; + + BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); + + if (status) { + hci_req_cmd_complete(hdev, opcode, status); + return; + } + + /* No need to handle success status if there are more commands */ + if (!hci_req_is_complete(hdev)) + return; + + if (hdev->sent_cmd) + req_complete = bt_cb(hdev->sent_cmd)->req.complete; + + /* If the request doesn't have a complete callback or there + * are other commands/requests in the hdev queue we consider + * this request as completed. + */ + if (!req_complete || !skb_queue_empty(&hdev->cmd_q)) + hci_req_cmd_complete(hdev, opcode, status); +} + static void hci_rx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 14e872aa0d2..8b878a3bdf6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -53,6 +53,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); hci_dev_unlock(hdev); + hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); hci_conn_check_pending(hdev); @@ -1692,6 +1693,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); + hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); @@ -2254,6 +2256,7 @@ static void hci_qos_setup_complete_evt(struct hci_dev *hdev, static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_cmd_complete *ev = (void *) skb->data; + u8 status = skb->data[sizeof(*ev)]; __u16 opcode; skb_pull(skb, sizeof(*ev)); @@ -2497,6 +2500,8 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); + hci_req_cmd_complete(hdev, ev->opcode, status); + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) @@ -2590,6 +2595,8 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); + hci_req_cmd_status(hdev, ev->opcode, ev->status); + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) -- cgit v1.2.3-70-g09d2 From 42c6b129cd8c2aa5012a78ec39672e7052cc677a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:49 +0200 Subject: Bluetooth: Use async requests internally in hci_req_sync This patch converts the hci_req_sync() procedure to internaly use the asynchronous HCI requests. The hci_req_sync mechanism relies on hci_req_complete() calls from hci_event.c into hci_core.c whenever a HCI command completes. This is very similar to what asynchronous requests do and makes the conversion fairly straight forward by converting hci_req_complete into a request complete callback. By this change hci_req_complete (renamed to hci_req_sync_complete) becomes private to hci_core.c and all calls to it can be removed from hci_event.c. The commands in each hci_req_sync procedure are collected into their own request by passing the hci_request pointer to the request callback (instead of the hci_dev pointer). The one slight exception is the HCI init request which has the special handling of HCI driver specific initialization commands. These commands are run in their own request prior to the "main" init request. One other extra change that this patch must contain is the handling of spontaneous HCI reset complete events that some controllers exhibit. These were previously handled in the hci_req_complete function but the right place for them now becomes the hci_req_cmd_complete function. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 - net/bluetooth/hci_core.c | 271 ++++++++++++++++++++++----------------- net/bluetooth/hci_event.c | 78 +---------- 3 files changed, 156 insertions(+), 195 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d732d6894ad..3f124f43fcb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1164,8 +1164,6 @@ struct hci_sec_filter { #define hci_req_lock(d) mutex_lock(&d->req_lock) #define hci_req_unlock(d) mutex_unlock(&d->req_lock) -void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result); - void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8], diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0ada2ec36e7..6218eced153 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event) /* ---- HCI requests ---- */ -void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) { - BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result); - - /* If this is the init phase check if the completed command matches - * the last init command, and if not just return. - */ - if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) { - struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(sent->opcode); - struct sk_buff *skb; - - /* Some CSR based controllers generate a spontaneous - * reset complete event during init and any pending - * command will never be completed. In such a case we - * need to resend whatever was the last sent - * command. - */ - - if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET) - return; - - skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC); - if (skb) { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - } - - return; - } + BT_DBG("%s result 0x%2.2x", hdev->name, result); if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; @@ -108,26 +81,36 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) /* Execute request and wait for completion. */ static int __hci_req_sync(struct hci_dev *hdev, - void (*req)(struct hci_dev *hdev, unsigned long opt), + void (*func)(struct hci_request *req, + unsigned long opt), unsigned long opt, __u32 timeout) { + struct hci_request req; DECLARE_WAITQUEUE(wait, current); int err = 0; BT_DBG("%s start", hdev->name); + hci_req_init(&req, hdev); + hdev->req_status = HCI_REQ_PEND; add_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_INTERRUPTIBLE); - req(hdev, opt); + func(&req, opt); - /* If the request didn't send any commands return immediately */ - if (skb_queue_empty(&hdev->cmd_q) && atomic_read(&hdev->cmd_cnt)) { + err = hci_req_run(&req, hci_req_sync_complete); + if (err < 0) { hdev->req_status = 0; remove_wait_queue(&hdev->req_wait_q, &wait); - return err; + /* req_run will fail if the request did not add any + * commands to the queue, something that can happen when + * a request with conditionals doesn't trigger any + * commands to be sent. This is normal behavior and + * should not trigger an error return. + */ + return 0; } schedule_timeout(timeout); @@ -159,7 +142,8 @@ static int __hci_req_sync(struct hci_dev *hdev, } static int hci_req_sync(struct hci_dev *hdev, - void (*req)(struct hci_dev *hdev, unsigned long opt), + void (*req)(struct hci_request *req, + unsigned long opt), unsigned long opt, __u32 timeout) { int ret; @@ -175,72 +159,80 @@ static int hci_req_sync(struct hci_dev *hdev, return ret; } -static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) +static void hci_reset_req(struct hci_request *req, unsigned long opt) { - BT_DBG("%s %ld", hdev->name, opt); + BT_DBG("%s %ld", req->hdev->name, opt); /* Reset device */ - set_bit(HCI_RESET, &hdev->flags); - hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); + set_bit(HCI_RESET, &req->hdev->flags); + hci_req_add(req, HCI_OP_RESET, 0, NULL); } -static void bredr_init(struct hci_dev *hdev) +static void bredr_init(struct hci_request *req) { - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; /* Read Local Supported Features */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); /* Read Local Version */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); /* Read BD Address */ - hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL); + hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); } -static void amp_init(struct hci_dev *hdev) +static void amp_init(struct hci_request *req) { - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; + req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; /* Read Local Version */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); /* Read Local AMP Info */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); /* Read Data Blk size */ - hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); } -static void hci_init1_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init1_req(struct hci_request *req, unsigned long opt) { + struct hci_dev *hdev = req->hdev; + struct hci_request init_req; struct sk_buff *skb; BT_DBG("%s %ld", hdev->name, opt); /* Driver initialization */ + hci_req_init(&init_req, hdev); + /* Special commands */ while ((skb = skb_dequeue(&hdev->driver_init))) { bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; - skb_queue_tail(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); + if (skb_queue_empty(&init_req.cmd_q)) + bt_cb(skb)->req.start = true; + + skb_queue_tail(&init_req.cmd_q, skb); } skb_queue_purge(&hdev->driver_init); + hci_req_run(&init_req, NULL); + /* Reset */ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) - hci_reset_req(hdev, 0); + hci_reset_req(req, 0); switch (hdev->dev_type) { case HCI_BREDR: - bredr_init(hdev); + bredr_init(req); break; case HCI_AMP: - amp_init(hdev); + amp_init(req); break; default: @@ -249,53 +241,53 @@ static void hci_init1_req(struct hci_dev *hdev, unsigned long opt) } } -static void bredr_setup(struct hci_dev *hdev) +static void bredr_setup(struct hci_request *req) { struct hci_cp_delete_stored_link_key cp; __le16 param; __u8 flt_type; /* Read Buffer Size (ACL mtu, max pkt, etc.) */ - hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL); /* Read Class of Device */ - hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); + hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); /* Read Local Name */ - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL); /* Read Voice Setting */ - hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL); + hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL); /* Clear Event Filters */ flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type); /* Connection accept timeout ~20 secs */ param = __constant_cpu_to_le16(0x7d00); - hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); + hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); bacpy(&cp.bdaddr, BDADDR_ANY); cp.delete_all = 0x01; - hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); } -static void le_setup(struct hci_dev *hdev) +static void le_setup(struct hci_request *req) { /* Read LE Buffer Size */ - hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); /* Read LE Local Supported Features */ - hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); /* Read LE Advertising Channel TX Power */ - hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); /* Read LE White List Size */ - hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); /* Read LE Supported States */ - hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); + hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); } static u8 hci_get_inquiry_mode(struct hci_dev *hdev) @@ -326,17 +318,19 @@ static u8 hci_get_inquiry_mode(struct hci_dev *hdev) return 0x00; } -static void hci_setup_inquiry_mode(struct hci_dev *hdev) +static void hci_setup_inquiry_mode(struct hci_request *req) { u8 mode; - mode = hci_get_inquiry_mode(hdev); + mode = hci_get_inquiry_mode(req->hdev); - hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); + hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); } -static void hci_setup_event_mask(struct hci_dev *hdev) +static void hci_setup_event_mask(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; + /* The second byte is 0xff instead of 0x9f (two reserved bits * disabled) since a Broadcom 1.2 dongle doesn't respond to the * command otherwise. @@ -392,67 +386,70 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (lmp_le_capable(hdev)) events[7] |= 0x20; /* LE Meta-Event */ - hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); + hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); if (lmp_le_capable(hdev)) { memset(events, 0, sizeof(events)); events[0] = 0x1f; - hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK, - sizeof(events), events); + hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, + sizeof(events), events); } } -static void hci_init2_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init2_req(struct hci_request *req, unsigned long opt) { + struct hci_dev *hdev = req->hdev; + if (lmp_bredr_capable(hdev)) - bredr_setup(hdev); + bredr_setup(req); if (lmp_le_capable(hdev)) - le_setup(hdev); + le_setup(req); - hci_setup_event_mask(hdev); + hci_setup_event_mask(req); if (hdev->hci_ver > BLUETOOTH_VER_1_1) - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { u8 mode = 0x01; - hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, - sizeof(mode), &mode); + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, + sizeof(mode), &mode); } else { struct hci_cp_write_eir cp; memset(hdev->eir, 0, sizeof(hdev->eir)); memset(&cp, 0, sizeof(cp)); - hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); } } if (lmp_inq_rssi_capable(hdev)) - hci_setup_inquiry_mode(hdev); + hci_setup_inquiry_mode(req); if (lmp_inq_tx_pwr_capable(hdev)) - hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); + hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); if (lmp_ext_feat_capable(hdev)) { struct hci_cp_read_local_ext_features cp; cp.page = 0x01; - hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp), - &cp); + hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, + sizeof(cp), &cp); } if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { u8 enable = 1; - hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), - &enable); + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), + &enable); } } -static void hci_setup_link_policy(struct hci_dev *hdev) +static void hci_setup_link_policy(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_write_def_link_policy cp; u16 link_policy = 0; @@ -466,11 +463,12 @@ static void hci_setup_link_policy(struct hci_dev *hdev) link_policy |= HCI_LP_PARK; cp.policy = cpu_to_le16(link_policy); - hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); } -static void hci_set_le_support(struct hci_dev *hdev) +static void hci_set_le_support(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_write_le_host_supported cp; memset(&cp, 0, sizeof(cp)); @@ -481,17 +479,19 @@ static void hci_set_le_support(struct hci_dev *hdev) } if (cp.le != lmp_host_le_capable(hdev)) - hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), - &cp); + hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), + &cp); } -static void hci_init3_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init3_req(struct hci_request *req, unsigned long opt) { + struct hci_dev *hdev = req->hdev; + if (hdev->commands[5] & 0x10) - hci_setup_link_policy(hdev); + hci_setup_link_policy(req); if (lmp_le_capable(hdev)) - hci_set_le_support(hdev); + hci_set_le_support(req); } static int __hci_init(struct hci_dev *hdev) @@ -516,44 +516,44 @@ static int __hci_init(struct hci_dev *hdev) return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); } -static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) +static void hci_scan_req(struct hci_request *req, unsigned long opt) { __u8 scan = opt; - BT_DBG("%s %x", hdev->name, scan); + BT_DBG("%s %x", req->hdev->name, scan); /* Inquiry and Page scans */ - hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) +static void hci_auth_req(struct hci_request *req, unsigned long opt) { __u8 auth = opt; - BT_DBG("%s %x", hdev->name, auth); + BT_DBG("%s %x", req->hdev->name, auth); /* Authentication */ - hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); } -static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) +static void hci_encrypt_req(struct hci_request *req, unsigned long opt) { __u8 encrypt = opt; - BT_DBG("%s %x", hdev->name, encrypt); + BT_DBG("%s %x", req->hdev->name, encrypt); /* Encryption */ - hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); + hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); } -static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) +static void hci_linkpol_req(struct hci_request *req, unsigned long opt) { __le16 policy = cpu_to_le16(opt); - BT_DBG("%s %x", hdev->name, policy); + BT_DBG("%s %x", req->hdev->name, policy); /* Default link policy */ - hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); + hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); } /* Get HCI device by index. @@ -790,9 +790,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) return copied; } -static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) +static void hci_inq_req(struct hci_request *req, unsigned long opt) { struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; + struct hci_dev *hdev = req->hdev; struct hci_cp_inquiry cp; BT_DBG("%s", hdev->name); @@ -804,7 +805,7 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) memcpy(&cp.lap, &ir->lap, 3); cp.length = ir->length; cp.num_rsp = ir->num_rsp; - hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); } int hci_inquiry(void __user *arg) @@ -1845,7 +1846,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return mgmt_device_unblocked(hdev, bdaddr, type); } -static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_param_req(struct hci_request *req, unsigned long opt) { struct le_scan_params *param = (struct le_scan_params *) opt; struct hci_cp_le_set_scan_param cp; @@ -1855,10 +1856,10 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) cp.interval = cpu_to_le16(param->interval); cp.window = cpu_to_le16(param->window); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); } -static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_enable_req(struct hci_request *req, unsigned long opt) { struct hci_cp_le_set_scan_enable cp; @@ -1866,7 +1867,7 @@ static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) cp.enable = 1; cp.filter_dup = 1; - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, @@ -3219,6 +3220,28 @@ static bool hci_req_is_complete(struct hci_dev *hdev) return bt_cb(skb)->req.start; } +static void hci_resend_last(struct hci_dev *hdev) +{ + struct hci_command_hdr *sent; + struct sk_buff *skb; + u16 opcode; + + if (!hdev->sent_cmd) + return; + + sent = (void *) hdev->sent_cmd->data; + opcode = __le16_to_cpu(sent->opcode); + if (opcode == HCI_OP_RESET) + return; + + skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); + if (!skb) + return; + + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); +} + void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) { hci_req_complete_t req_complete = NULL; @@ -3227,11 +3250,21 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); - /* Check that the completed command really matches the last one - * that was sent. + /* If the completed command doesn't match the last one that was + * sent we need to do special handling of it. */ - if (!hci_sent_cmd_data(hdev, opcode)) + if (!hci_sent_cmd_data(hdev, opcode)) { + /* Some CSR based controllers generate a spontaneous + * reset complete event during init and any pending + * command will never be completed. In such a case we + * need to resend whatever was the last sent + * command. + */ + if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET) + hci_resend_last(hdev); + return; + } /* If the command succeeded and there's still more commands in * this request the request is not yet complete. diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8b878a3bdf6..0dd85a0c05f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -54,7 +54,6 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); - hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); hci_conn_check_pending(hdev); } @@ -184,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev, if (!status) hdev->link_policy = get_unaligned_le16(sent); - - hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status); } static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) @@ -196,8 +193,6 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); - hci_req_complete(hdev, HCI_OP_RESET, status); - /* Reset all non-persistent flags */ hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | BIT(HCI_PERIODIC_INQ)); @@ -232,8 +227,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) if (!status && !test_bit(HCI_INIT, &hdev->flags)) hci_update_ad(hdev); - - hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -271,8 +264,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_auth_enable_complete(hdev, status); - - hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status); } static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -294,8 +285,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) else clear_bit(HCI_ENCRYPT, &hdev->flags); } - - hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status); } static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) @@ -344,7 +333,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) done: hci_dev_unlock(hdev); - hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); } static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) @@ -441,8 +429,6 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status); } static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -480,7 +466,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - goto done; + return; hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); @@ -490,9 +476,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name, hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); - -done: - hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status); } static void hci_cc_read_local_commands(struct hci_dev *hdev, @@ -504,8 +487,6 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); - - hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status); } static void hci_cc_read_local_features(struct hci_dev *hdev, @@ -572,7 +553,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); if (rp->status) - goto done; + return; switch (rp->page) { case 0: @@ -582,9 +563,6 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, memcpy(hdev->host_features, rp->features, 8); break; } - -done: - hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status); } static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, @@ -594,12 +572,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (rp->status) - return; - - hdev->flow_ctl_mode = rp->mode; - - hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status); + if (!rp->status) + hdev->flow_ctl_mode = rp->mode; } static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) @@ -636,8 +610,6 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) if (!rp->status) bacpy(&hdev->bdaddr, &rp->bdaddr); - - hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); } static void hci_cc_read_data_block_size(struct hci_dev *hdev, @@ -658,8 +630,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev, BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, hdev->block_cnt, hdev->block_len); - - hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status); } static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) @@ -667,8 +637,6 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } static void hci_cc_read_local_amp_info(struct hci_dev *hdev, @@ -692,8 +660,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev, hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); - hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status); - a2mp_rsp: a2mp_send_getinfo_rsp(hdev); } @@ -741,8 +707,6 @@ static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status); } static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) @@ -750,8 +714,6 @@ static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status); } static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, @@ -760,8 +722,6 @@ static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status); } static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, @@ -773,8 +733,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, if (!rp->status) hdev->inq_tx_power = rp->tx_power; - - hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status); } static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) @@ -782,8 +740,6 @@ static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status); } static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -845,8 +801,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hdev->le_cnt = hdev->le_pkts; BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); - - hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } static void hci_cc_le_read_local_features(struct hci_dev *hdev, @@ -858,8 +812,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->le_features, rp->features, 8); - - hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status); } static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, @@ -874,8 +826,6 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, if (!test_bit(HCI_INIT, &hdev->flags)) hci_update_ad(hdev); } - - hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status); } static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) @@ -883,8 +833,6 @@ static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); - - hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status); } static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -985,8 +933,6 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!test_bit(HCI_INIT, &hdev->flags)) hci_update_ad(hdev); - - hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status); } static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) @@ -995,8 +941,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status); - if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -1019,8 +963,6 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCANNING_ENABLED: - hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status); - if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -1071,8 +1013,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, if (!rp->status) hdev->le_white_list_size = rp->size; - - hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status); } static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1083,8 +1023,6 @@ static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - - hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status); } static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1095,8 +1033,6 @@ static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - - hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status); } static void hci_cc_le_read_supported_states(struct hci_dev *hdev, @@ -1108,8 +1044,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev, if (!rp->status) memcpy(hdev->le_states, rp->le_states, 8); - - hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status); } static void hci_cc_write_le_host_supported(struct hci_dev *hdev, @@ -1139,8 +1073,6 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (test_bit(HCI_MGMT, &hdev->dev_flags) && !test_bit(HCI_INIT, &hdev->flags)) mgmt_le_enable_complete(hdev, sent->le, status); - - hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status); } static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, @@ -1162,7 +1094,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) BT_DBG("%s status 0x%2.2x", hdev->name, status); if (status) { - hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); hci_dev_lock(hdev); if (test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -1694,7 +1625,6 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status); - hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); -- cgit v1.2.3-70-g09d2 From cecbb967b2f5c52e090978ff6afe7deddbfbeda5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Mar 2013 20:37:50 +0200 Subject: Bluetooth: Remove unused hdev->init_last_cmd This variable is no longer needed (due to async HCI request support and the conversion of hci_req_sync to use it), so it can be safely removed. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 -- net/bluetooth/hci_core.c | 6 ------ 2 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3f124f43fcb..3a9cbf2b55c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -248,8 +248,6 @@ struct hci_dev { __u32 req_status; __u32 req_result; - __u16 init_last_cmd; - struct list_head mgmt_pending; struct discovery_state discovery; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6218eced153..3fc699db8fb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1015,10 +1015,7 @@ int hci_dev_open(__u16 dev) if (!test_bit(HCI_RAW, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); - hdev->init_last_cmd = 0; - ret = __hci_init(hdev); - clear_bit(HCI_INIT, &hdev->flags); } @@ -2509,9 +2506,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) return -ENOMEM; } - if (test_bit(HCI_INIT, &hdev->flags)) - hdev->init_last_cmd = opcode; - /* Stand-alone HCI commands must be flaged as * single-command requests. */ -- cgit v1.2.3-70-g09d2 From c031e234ee304b507b79f76a7677ea0a7a8890e8 Mon Sep 17 00:00:00 2001 From: Andy King Date: Thu, 7 Mar 2013 05:26:13 +0000 Subject: VSOCK: Split vm_sockets.h into kernel/uapi Split the vSockets header into kernel and UAPI parts. The former gets the bits that used to be in __KERNEL__ guards, while the latter gets everything that is user-visible. Tested by compiling vsock (+transport) and a simple user-mode vSockets application. Reported-by: David Howells Acked-by: Dmitry Torokhov Signed-off-by: Andy King Acked-by: David Howells Signed-off-by: David S. Miller --- include/linux/vm_sockets.h | 23 +++++++++++++++++++++++ include/uapi/linux/vm_sockets.h | 23 ++++++++--------------- 2 files changed, 31 insertions(+), 15 deletions(-) create mode 100644 include/linux/vm_sockets.h (limited to 'include') diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h new file mode 100644 index 00000000000..0805eecba8f --- /dev/null +++ b/include/linux/vm_sockets.h @@ -0,0 +1,23 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VM_SOCKETS_H +#define _VM_SOCKETS_H + +#include + +int vm_sockets_get_local_cid(void); + +#endif /* _VM_SOCKETS_H */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index df91301847e..b4ed5d89569 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -13,12 +13,10 @@ * more details. */ -#ifndef _VM_SOCKETS_H_ -#define _VM_SOCKETS_H_ +#ifndef _UAPI_VM_SOCKETS_H +#define _UAPI_VM_SOCKETS_H -#if !defined(__KERNEL__) -#include -#endif +#include /* Option name for STREAM socket buffer size. Use as the option name in * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that @@ -137,14 +135,13 @@ #define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) /* Address structure for vSockets. The address family should be set to - * whatever vmci_sock_get_af_value_fd() returns. The structure members should - * all align on their natural boundaries without resorting to compiler packing - * directives. The total size of this structure should be exactly the same as - * that of struct sockaddr. + * AF_VSOCK. The structure members should all align on their natural + * boundaries without resorting to compiler packing directives. The total size + * of this structure should be exactly the same as that of struct sockaddr. */ struct sockaddr_vm { - sa_family_t svm_family; + __kernel_sa_family_t svm_family; unsigned short svm_reserved1; unsigned int svm_port; unsigned int svm_cid; @@ -156,8 +153,4 @@ struct sockaddr_vm { #define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) -#if defined(__KERNEL__) -int vm_sockets_get_local_cid(void); -#endif - -#endif +#endif /* _UAPI_VM_SOCKETS_H */ -- cgit v1.2.3-70-g09d2 From b7ef213ef65256168df83ddfbb8131ed9adc10f9 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 8 Mar 2013 02:07:16 +0000 Subject: ipv6: introdcue __ipv6_addr_needs_scope_id and ipv6_iface_scope_id helper functions __ipv6_addr_needs_scope_id checks if an ipv6 address needs to supply a 'sin6_scope_id != 0'. 'sin6_scope_id != 0' was enforced in case of link-local addresses. To support interface-local multicast these checks had to be enhanced and are now consolidated into these new helper functions. v2: a) migrated to struct ipv6_addr_props v3: a) reverted changes for ipv6_addr_props b) test for address type instead of comparing scope v4: a) unchanged Suggested-by: YOSHIFUJI Hideaki Cc: YOSHIFUJI Hideaki Acked-by: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 988c9f28f0f..42ef6abf25c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -320,6 +320,18 @@ static inline int ipv6_addr_src_scope(const struct in6_addr *addr) return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } +static inline bool __ipv6_addr_needs_scope_id(int type) +{ + return type & IPV6_ADDR_LINKLOCAL || + (type & IPV6_ADDR_MULTICAST && + (type & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL))); +} + +static inline __u32 ipv6_iface_scope_id(const struct in6_addr *addr, int iface) +{ + return __ipv6_addr_needs_scope_id(__ipv6_addr_type(addr)) ? iface : 0; +} + static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) { return memcmp(a1, a2, sizeof(struct in6_addr)); -- cgit v1.2.3-70-g09d2 From 5d73e0342fd9bf500583868906325d42c4d2bf6f Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 8 Mar 2013 11:20:16 -0300 Subject: Bluetooth: HCI request error handling When we are building a HCI request with more than one HCI command and one of the hci_req_add calls fail, we should have some cleanup routine so the HCI commands already queued on HCI request can be deleted. Otherwise, we will face some memory leaks issues. This patch implements the HCI request error handling which is the following: If a hci_req_add fails, we save the error code in hci_ request. Once hci_req_run is called, we verify the error field. If it is different from zero, we delete all HCI commands already queued and return the error code. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 5 +++++ net/bluetooth/hci_core.c | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3a9cbf2b55c..332ee5099a5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1042,6 +1042,11 @@ int hci_unregister_cb(struct hci_cb *hcb); struct hci_request { struct hci_dev *hdev; struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; }; void hci_req_init(struct hci_request *req, struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4603464b91e..b432baafdf1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2443,6 +2443,7 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); req->hdev = hdev; + req->err = 0; } int hci_req_run(struct hci_request *req, hci_req_complete_t complete) @@ -2453,6 +2454,14 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + /* If an error occured during request building, remove all HCI + * commands queued on the HCI request queue. + */ + if (req->err) { + skb_queue_purge(&req->cmd_q); + return req->err; + } + /* Do not allow empty requests */ if (skb_queue_empty(&req->cmd_q)) return -ENODATA; @@ -2529,7 +2538,9 @@ int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) skb = hci_prepare_cmd(hdev, opcode, plen, param); if (!skb) { - BT_ERR("%s no memory for command", hdev->name); + BT_ERR("%s no memory for command (opcode 0x%4.4x)", + hdev->name, opcode); + req->err = -ENOMEM; return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From e348fe6bbab85c513816d2536ffabac4be016442 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 8 Mar 2013 11:20:17 -0300 Subject: Bluetooth: Make hci_req_add returning void Since no one checks the returning value of hci_req_add and HCI request errors are now handled in hci_req_run, we can make hci_ req_add returning void. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 332ee5099a5..d6c32561fc0 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1051,7 +1051,7 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); -int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b432baafdf1..1c678757c83 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2529,7 +2529,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) } /* Queue a command to an asynchronous HCI request */ -int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -2541,15 +2541,13 @@ int hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) BT_ERR("%s no memory for command (opcode 0x%4.4x)", hdev->name, opcode); req->err = -ENOMEM; - return -ENOMEM; + return; } if (skb_queue_empty(&req->cmd_q)) bt_cb(skb)->req.start = true; skb_queue_tail(&req->cmd_q, skb); - - return 0; } /* Get data from the previously sent command */ -- cgit v1.2.3-70-g09d2 From ec5f061564238892005257c83565a0b58ec79295 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 7 Mar 2013 09:28:01 +0000 Subject: net: Kill link between CSUM and SG features. Earlier SG was unset if CSUM was not available for given device to force skb copy to avoid sending inconsistent csum. Commit c9af6db4c11c (net: Fix possible wrong checksum generation) added explicit flag to force copy to fix this issue. Therefore there is no need to link SG and CSUM, following patch kills this link between there two features. This patch is also required following patch in series. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 ++++++++++ net/core/dev.c | 63 +++++++++++++++++++++++++---------------------- net/core/skbuff.c | 13 ++++++++++ net/ipv4/af_inet.c | 3 --- net/ipv6/ip6_offload.c | 3 --- 5 files changed, 59 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 896eb4985f9..e1ebeffa6b3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2683,6 +2683,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } +__be16 skb_network_protocol(struct sk_buff *skb); + +static inline bool can_checksum_protocol(netdev_features_t features, + __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_V4_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_V6_CSUM) && + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); +} #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 96103894ad6..bb999931729 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2208,16 +2208,8 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/** - * skb_mac_gso_segment - mac layer segmentation handler. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features) +__be16 skb_network_protocol(struct sk_buff *skb) { - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; __be16 type = skb->protocol; while (type == htons(ETH_P_8021Q)) { @@ -2225,13 +2217,31 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) - return ERR_PTR(-EINVAL); + return 0; vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } + return type; +} + +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + __be16 type = skb_network_protocol(skb); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + __skb_pull(skb, skb->mac_len); rcu_read_lock(); @@ -2398,24 +2408,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; - features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -4921,20 +4919,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } - /* Fix illegal SG+CSUM combinations. */ - if ((features & NETIF_F_SG) && - !(features & NETIF_F_ALL_CSUM)) { - netdev_dbg(dev, - "Dropping NETIF_F_SG since no checksum feature.\n"); - features &= ~NETIF_F_SG; - } - /* TSO requires that SG is present as well. */ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); features &= ~NETIF_F_ALL_TSO; } + if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IP_CSUM)) { + netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO; + features &= ~NETIF_F_TSO_ECN; + } + + if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IPV6_CSUM)) { + netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO6; + } + /* TSO ECN requires that TSO is present as well. */ if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) features &= ~NETIF_F_TSO_ECN; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 33245ef54c3..0a48ae20c90 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2741,12 +2741,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; + __be16 proto; + bool csum; int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; + proto = skb_network_protocol(skb); + if (unlikely(!proto)) + return ERR_PTR(-EINVAL); + + csum = !!can_checksum_protocol(features, proto); __skb_push(skb, doffset); headroom = skb_headroom(skb); pos = skb_headlen(skb); @@ -2884,6 +2891,12 @@ skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; + + if (!csum) { + nskb->csum = skb_checksum(nskb, doffset, + nskb->len - doffset, 0); + nskb->ip_summed = CHECKSUM_NONE; + } } while ((offset += len) < skb->len); return segs; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 68f6a94f766..dc3f677360a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1284,9 +1284,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int id; unsigned int offset = 0; - if (!(features & NETIF_F_V4_CSUM)) - features &= ~NETIF_F_SG; - if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | SKB_GSO_UDP | diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 8234c1dcdf7..7a0d25a5479 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -92,9 +92,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u8 *prevhdr; int offset = 0; - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | -- cgit v1.2.3-70-g09d2 From aefbd2b3c2a9c657605e4337f9919d6c6273e8e6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 7 Mar 2013 13:21:46 +0000 Subject: tunneling: Capture inner mac header during encapsulation. This patch adds inner mac header. This will be used in next patch to find tunner header length. Header len is required to copy tunnel header to each gso segment. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 34 ++++++++++++++++++++++++++++++++++ net/core/skbuff.c | 2 ++ 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 821c7f45d2a..d7f96ff68f7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -387,6 +387,7 @@ typedef unsigned char *sk_buff_data_t; * @vlan_tci: vlan tag control information * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) + * @inner_mac_header: Link layer header (encapsulation) * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header @@ -505,6 +506,7 @@ struct sk_buff { sk_buff_data_t inner_transport_header; sk_buff_data_t inner_network_header; + sk_buff_data_t inner_mac_header; sk_buff_data_t transport_header; sk_buff_data_t network_header; sk_buff_data_t mac_header; @@ -1466,6 +1468,7 @@ static inline void skb_reserve(struct sk_buff *skb, int len) static inline void skb_reset_inner_headers(struct sk_buff *skb) { + skb->inner_mac_header = skb->mac_header; skb->inner_network_header = skb->network_header; skb->inner_transport_header = skb->transport_header; } @@ -1511,6 +1514,22 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) +{ + return skb->head + skb->inner_mac_header; +} + +static inline void skb_reset_inner_mac_header(struct sk_buff *skb) +{ + skb->inner_mac_header = skb->data - skb->head; +} + +static inline void skb_set_inner_mac_header(struct sk_buff *skb, + const int offset) +{ + skb_reset_inner_mac_header(skb); + skb->inner_mac_header += offset; +} static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != ~0U; @@ -1604,6 +1623,21 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header = skb->data + offset; } +static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) +{ + return skb->inner_mac_header; +} + +static inline void skb_reset_inner_mac_header(struct sk_buff *skb) +{ + skb->inner_mac_header = skb->data; +} + +static inline void skb_set_inner_mac_header(struct sk_buff *skb, + const int offset) +{ + skb->inner_mac_header = skb->data + offset; +} static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != NULL; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0278c7f787b..31c6737d318 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -673,6 +673,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; new->inner_transport_header = old->inner_transport_header; new->inner_network_header = old->inner_network_header; + new->inner_mac_header = old->inner_mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; @@ -876,6 +877,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) skb->mac_header += off; skb->inner_transport_header += off; skb->inner_network_header += off; + skb->inner_mac_header += off; } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -- cgit v1.2.3-70-g09d2 From 731362674580cb0c696cd1b1a03d8461a10cf90a Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 7 Mar 2013 13:21:51 +0000 Subject: tunneling: Add generic Tunnel segmentation. Adds generic tunneling offloading support for IPv4-UDP based tunnels. GSO type is added to request this offload for a skb. netdev feature NETIF_F_UDP_TUNNEL is added for hardware offloaded udp-tunnel support. Currently no device supports this feature, software offload is used. This can be used by tunneling protocols like VXLAN. CC: Jesse Gross Signed-off-by: Pravin B Shelar Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 7 +-- include/linux/skbuff.h | 2 + net/core/ethtool.c | 1 + net/ipv4/af_inet.c | 6 ++- net/ipv4/tcp.c | 1 + net/ipv4/udp.c | 115 +++++++++++++++++++++++++++++++--------- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 8 ++- 8 files changed, 111 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 3dd39340430..f5e797c0c2a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -42,9 +42,9 @@ enum { NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ - /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ - NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ - = NETIF_F_GSO_LAST, + NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ + /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ + NETIF_F_GSO_UDP_TUNNEL_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ @@ -103,6 +103,7 @@ enum { #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) +#define NETIF_F_UDP_TUNNEL __NETIF_F(UDP_TUNNEL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d7f96ff68f7..eb2106fe3bb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -316,6 +316,8 @@ enum { SKB_GSO_FCOE = 1 << 5, SKB_GSO_GRE = 1 << 6, + + SKB_GSO_UDP_TUNNEL = 1 << 7, }; #if BITS_PER_LONG > 32 diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3e9b2c3e30f..adc1351e687 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -78,6 +78,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dc3f677360a..9e5882caf8a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1283,6 +1283,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int ihl; int id; unsigned int offset = 0; + bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -1290,6 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0))) goto out; @@ -1304,6 +1306,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, ihl))) goto out; + tunnel = !!skb->encapsulation; + __skb_pull(skb, ihl); skb_reset_transport_header(skb); iph = ip_hdr(skb); @@ -1323,7 +1327,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = ip_hdr(skb); - if (proto == IPPROTO_UDP) { + if (!tunnel && proto == IPPROTO_UDP) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 47e854fcae2..8d14573ade7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3044,6 +3044,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 265c42cf963..41760e043bf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2272,31 +2272,88 @@ void __init udp_init(void) int udp4_ufo_send_check(struct sk_buff *skb) { - const struct iphdr *iph; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) + if (!pskb_may_pull(skb, sizeof(struct udphdr))) return -EINVAL; - iph = ip_hdr(skb); - uh = udp_hdr(skb); + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } return 0; } +static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + int mac_len = skb->mac_len; + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + int outer_hlen; + netdev_features_t enc_features; + + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) + goto out; + + skb->encapsulation = 0; + __skb_pull(skb, tnl_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + outer_hlen = skb_tnl_header_len(skb); + skb = segs; + do { + struct udphdr *uh; + int udp_offset = outer_hlen - tnl_hlen; + + skb->mac_len = mac_len; + + skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, udp_offset); + uh = udp_hdr(skb); + uh->len = htons(skb->len - udp_offset); + + /* csum segment if tunnel sets skb with csum. */ + if (unlikely(uh->check)) { + struct iphdr *iph = ip_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + skb->len - udp_offset, + IPPROTO_UDP, 0); + uh->check = csum_fold(skb_checksum(skb, udp_offset, + skb->len - udp_offset, 0)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + } + skb->ip_summed = CHECKSUM_NONE; + } while ((skb = skb->next)); +out: + return segs; +} + struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; - int offset; - __wsum csum; - mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) goto out; @@ -2306,6 +2363,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int type = skb_shinfo(skb)->gso_type; if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; @@ -2316,20 +2374,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - segs = skb_segment(skb, features); + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } out: return segs; } - diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 7a0d25a5479..71b766ee821 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -97,6 +97,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index cf05cf073c5..3bb3a891a42 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb) const struct ipv6hdr *ipv6h; struct udphdr *uh; + /* UDP Tunnel offload on ipv6 is not yet supported. */ + if (skb->encapsulation) + return -EINVAL; + if (!pskb_may_pull(skb, sizeof(*uh))) return -EINVAL; @@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + if (unlikely(type & ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; -- cgit v1.2.3-70-g09d2 From 6aed0c8bf7d2f389b472834053eb6e3bd6926999 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 9 Mar 2013 16:38:39 +0000 Subject: tunnel: use iptunnel_xmit() again With recent patches from Pravin, most tunnels can't use iptunnel_xmit() any more, due to ip_select_ident() and skb->ip_summed. But we can just move these operations out of iptunnel_xmit(), so that tunnels can use it again. This by the way fixes a bug in vxlan (missing nf_reset()) for net-next. Cc: Pravin B Shelar Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 14 +------------- include/net/ipip.h | 3 --- net/ipv4/ip_gre.c | 16 +--------------- net/ipv4/ipip.c | 18 +----------------- net/ipv6/sit.c | 2 ++ 5 files changed, 5 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f057ec00bba..f3a135cb50a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -855,7 +855,6 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) __u16 src_port; __be16 df = 0; __u8 tos, ttl; - int err; bool did_rsc = false; const struct vxlan_fdb *f; @@ -980,18 +979,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) if (handle_offloads(skb)) goto drop; - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - struct vxlan_stats *stats = this_cpu_ptr(vxlan->stats); - - u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += pkt_len; - u64_stats_update_end(&stats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; drop: diff --git a/include/net/ipip.h b/include/net/ipip.h index fd19625ff99..0c046e3bca0 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -51,13 +51,10 @@ struct ip_tunnel_prl_entry { static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) { int err; - struct iphdr *iph = ip_hdr(skb); int pkt_len = skb->len - skb_transport_offset(skb); struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); nf_reset(skb); - skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); err = ip_local_out(skb); if (likely(net_xmit_eval(err) == 0)) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d0ef0e674ec..a13a0972a57 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -762,7 +762,6 @@ error: static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *old_iph; const struct iphdr *tiph; @@ -778,7 +777,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev int mtu; u8 ttl; int err; - int pkt_len; skb = handle_offloads(tunnel, skb); if (IS_ERR(skb)) { @@ -1022,19 +1020,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - nf_reset(skb); - - pkt_len = skb->len - skb_transport_offset(skb); - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index b50435ba0ce..34e006fe2d8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -478,8 +478,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; - int err; - int pkt_len; if (skb->protocol != htons(ETH_P_IP)) goto tx_error; @@ -600,21 +598,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if ((iph->ttl = tiph->ttl) == 0) iph->ttl = old_iph->ttl; - nf_reset(skb); - - pkt_len = skb->len - skb_transport_offset(skb); - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 02f96dcbcf0..898e671a526 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -899,6 +899,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, skb_dst(skb), NULL); iptunnel_xmit(skb, dev); return NETDEV_TX_OK; -- cgit v1.2.3-70-g09d2 From e8f72ea4a1380eeca10a551bc8d678e7d4388d42 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 9 Mar 2013 23:00:39 +0000 Subject: ipv6: introduce ip6tunnel_xmit() helper Similar to iptunnel_xmit(), group these operations into a helper function. This by the way fixes the missing u64_stats_update_begin() and u64_stats_update_end() for 32 bit arch. Cc: Eric Dumazet Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 20 ++++++++++++++++++++ net/ipv6/ip6_gre.c | 17 +---------------- net/ipv6/ip6_tunnel.c | 15 +-------------- 3 files changed, 22 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index e03047f7090..ebdef7f6086 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -68,4 +68,24 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); +static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_device_stats *stats = &dev->stats; + int pkt_len, err; + + nf_reset(skb); + pkt_len = skb->len; + err = ip6_local_out(skb); + + if (net_xmit_eval(err) == 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } +} #endif diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e4efffe2522..6a6ba73ff26 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -667,7 +667,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &tunnel->dev->stats; int err = -1; u8 proto; - int pkt_len; struct sk_buff *new_skb; if (dev->type == ARPHRD_ETHER) @@ -801,23 +800,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } } - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } - + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); - return 0; tx_err_link_failure: stats->tx_carrier_errors++; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fff83cbc197..bef3fedfdc5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -955,7 +955,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; int err = -1; - int pkt_len; if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); @@ -1035,19 +1034,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; -- cgit v1.2.3-70-g09d2 From e61667af2f77d481411f2ccd307fed2247d785a8 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sun, 10 Mar 2013 05:18:39 +0000 Subject: tcp: Remove unused tw_cookie_values from tcp_timewait_sock tw_cookie_values is never used in the TCP-stack. It was added by 435cf559f (TCPCT part 1d: define TCP cookie option, extend existing struct's), but already at that time it was not used at all, nor mentioned in the commit-message. Signed-off-by: Christoph Paasch Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f28408c07dc..515c3746b67 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -361,10 +361,6 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif - /* Few sockets in timewait have cookies; in that case, then this - * object holds a reference to them (tw_cookie_values->kref). - */ - struct tcp_cookie_values *tw_cookie_values; }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) -- cgit v1.2.3-70-g09d2 From 26fd76cab2e61cedc5c25f7151fb31b57ddc53c7 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 22 Feb 2013 10:53:25 +0100 Subject: NFC: llcp: Implement socket options Some LLCP services (e.g. the validation ones) require some control over the LLCP link parameters like the receive window (RW) or the MIU extension (MIUX). This can only be done through socket options. Signed-off-by: Samuel Ortiz --- include/linux/socket.h | 1 + include/uapi/linux/nfc.h | 4 ++ net/nfc/llcp/llcp.h | 3 ++ net/nfc/llcp/sock.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 125 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 2b9f74b0ffe..428c37a1f95 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -298,6 +298,7 @@ struct ucred { #define SOL_IUCV 277 #define SOL_CAIF 278 #define SOL_ALG 279 +#define SOL_NFC 280 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 7969f46f1bb..855630fe731 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -220,4 +220,8 @@ struct sockaddr_nfc_llcp { #define NFC_LLCP_DIRECTION_RX 0x00 #define NFC_LLCP_DIRECTION_TX 0x01 +/* socket option names */ +#define NFC_LLCP_RW 0 +#define NFC_LLCP_MIUX 1 + #endif /*__LINUX_NFC_H */ diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 32cec81939e..5f117adac2e 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -104,6 +104,9 @@ struct nfc_llcp_sock { u8 dsap; char *service_name; size_t service_name_len; + u8 rw; + u16 miux; + /* Remote link parameters */ u8 remote_rw; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index cc564992ba9..9357a756f7a 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -223,6 +223,121 @@ error: return ret; } +static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + u32 opt; + int err = 0; + + pr_debug("%p optname %d\n", sk, optname); + + if (level != SOL_NFC) + return -ENOPROTOOPT; + + lock_sock(sk); + + switch (optname) { + case NFC_LLCP_RW: + if (sk->sk_state == LLCP_CONNECTED || + sk->sk_state == LLCP_BOUND || + sk->sk_state == LLCP_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > LLCP_MAX_RW) { + err = -EINVAL; + break; + } + + llcp_sock->rw = (u8) opt; + + break; + + case NFC_LLCP_MIUX: + if (sk->sk_state == LLCP_CONNECTED || + sk->sk_state == LLCP_BOUND || + sk->sk_state == LLCP_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > LLCP_MAX_MIUX) { + err = -EINVAL; + break; + } + + llcp_sock->miux = (u16) opt; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + + return err; +} + +static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + int len, err = 0; + + pr_debug("%p optname %d\n", sk, optname); + + if (level != SOL_NFC) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(u32, len, sizeof(u32)); + + lock_sock(sk); + + switch (optname) { + case NFC_LLCP_RW: + if (put_user(llcp_sock->rw, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case NFC_LLCP_MIUX: + if (put_user(llcp_sock->miux, (u32 __user *) optval)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + + if (put_user(len, optlen)) + return -EFAULT; + + return err; +} + void nfc_llcp_accept_unlink(struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -735,8 +850,8 @@ static const struct proto_ops llcp_sock_ops = { .ioctl = sock_no_ioctl, .listen = llcp_sock_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = nfc_llcp_setsockopt, + .getsockopt = nfc_llcp_getsockopt, .sendmsg = llcp_sock_sendmsg, .recvmsg = llcp_sock_recvmsg, .mmap = sock_no_mmap, -- cgit v1.2.3-70-g09d2 From d9b8d8e19b073096d3609bbd60f82148d128b555 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Fri, 15 Feb 2013 10:43:06 +0100 Subject: NFC: llcp: Service Name Lookup netlink interface This adds a netlink interface for service name lookup support. Multiple URIs can be passed nested into the NFC_ATTR_LLC_SDP attribute using the NFC_CMD_LLC_SDREQ netlink command. When the SNL reply is received, a NFC_EVENT_LLC_SDRES event is sent to the user space. URI and SAP tuples are passed back, nested into NFC_ATTR_LLC_SDP attribute. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 12 ++++ net/nfc/llcp/commands.c | 78 ++++++++++++++++++++++ net/nfc/llcp/llcp.c | 32 +++++++++ net/nfc/llcp/llcp.h | 9 +++ net/nfc/netlink.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 14 ++++ 6 files changed, 314 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 855630fe731..7440bc81a04 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -90,6 +90,8 @@ enum nfc_commands { NFC_CMD_LLC_SET_PARAMS, NFC_CMD_ENABLE_SE, NFC_CMD_DISABLE_SE, + NFC_CMD_LLC_SDREQ, + NFC_EVENT_LLC_SDRES, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -140,11 +142,21 @@ enum nfc_attrs { NFC_ATTR_LLC_PARAM_RW, NFC_ATTR_LLC_PARAM_MIUX, NFC_ATTR_SE, + NFC_ATTR_LLC_SDP, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; #define NFC_ATTR_MAX (__NFC_ATTR_AFTER_LAST - 1) +enum nfc_sdp_attr { + NFC_SDP_ATTR_UNSPEC, + NFC_SDP_ATTR_URI, + NFC_SDP_ATTR_SAP, +/* private: internal use only */ + __NFC_SDP_ATTR_AFTER_LAST +}; +#define NFC_SDP_ATTR_MAX (__NFC_SDP_ATTR_AFTER_LAST - 1) + #define NFC_DEVICE_NAME_MAXSIZE 8 #define NFC_NFCID1_MAXSIZE 10 #define NFC_SENSB_RES_MAXSIZE 12 diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index 59f7ffca783..c943edb07b7 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -144,12 +144,59 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap) return sdres; } +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, + size_t uri_len) +{ + struct nfc_llcp_sdp_tlv *sdreq; + + pr_debug("uri: %s, len: %zu\n", uri, uri_len); + + sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL); + if (sdreq == NULL) + return NULL; + + sdreq->tlv_len = uri_len + 3; + + if (uri[uri_len - 1] == 0) + sdreq->tlv_len--; + + sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL); + if (sdreq->tlv == NULL) { + kfree(sdreq); + return NULL; + } + + sdreq->tlv[0] = LLCP_TLV_SDREQ; + sdreq->tlv[1] = sdreq->tlv_len - 2; + sdreq->tlv[2] = tid; + + sdreq->tid = tid; + sdreq->uri = sdreq->tlv + 3; + memcpy(sdreq->uri, uri, uri_len); + + INIT_HLIST_NODE(&sdreq->node); + + return sdreq; +} + void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp) { kfree(sdp->tlv); kfree(sdp); } +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head) +{ + struct nfc_llcp_sdp_tlv *sdp; + struct hlist_node *n; + + hlist_for_each_entry_safe(sdp, n, head, node) { + hlist_del(&sdp->node); + + nfc_llcp_free_sdp_tlv(sdp); + } +} + int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, u8 *tlv_array, u16 tlv_array_len) { @@ -511,6 +558,37 @@ int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local, return 0; } +int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local, + struct hlist_head *tlv_list, size_t tlvs_len) +{ + struct nfc_llcp_sdp_tlv *sdreq; + struct hlist_node *n; + struct sk_buff *skb; + + skb = nfc_llcp_allocate_snl(local, tlvs_len); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + mutex_lock(&local->sdreq_lock); + + hlist_for_each_entry_safe(sdreq, n, tlv_list, node) { + pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri); + + memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv, + sdreq->tlv_len); + + hlist_del(&sdreq->node); + + hlist_add_head(&sdreq->node, &local->pending_sdreqs); + } + + mutex_unlock(&local->sdreq_lock); + + skb_queue_tail(&local->tx_queue, skb); + + return 0; +} + int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason) { struct sk_buff *skb; diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 30b61c1aa98..99e91106042 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -156,6 +156,7 @@ static void local_release(struct kref *ref) cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); + nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); kfree(local); } @@ -1147,6 +1148,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, struct nfc_llcp_sdp_tlv *sdp; HLIST_HEAD(llc_sdres_list); size_t sdres_tlvs_len; + HLIST_HEAD(nl_sdres_list); dsap = nfc_llcp_dsap(skb); ssap = nfc_llcp_ssap(skb); @@ -1229,6 +1231,30 @@ add_snl: hlist_add_head(&sdp->node, &llc_sdres_list); break; + case LLCP_TLV_SDRES: + mutex_lock(&local->sdreq_lock); + + pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]); + + hlist_for_each_entry(sdp, &local->pending_sdreqs, node) { + if (sdp->tid != tlv[2]) + continue; + + sdp->sap = tlv[3]; + + pr_debug("Found: uri=%s, sap=%d\n", + sdp->uri, sdp->sap); + + hlist_del(&sdp->node); + + hlist_add_head(&sdp->node, &nl_sdres_list); + + break; + } + + mutex_unlock(&local->sdreq_lock); + break; + default: pr_err("Invalid SNL tlv value 0x%x\n", type); break; @@ -1239,6 +1265,9 @@ add_snl: } exit: + if (!hlist_empty(&nl_sdres_list)) + nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); + if (!hlist_empty(&llc_sdres_list)) nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len); } @@ -1426,6 +1455,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) local->remote_miu = LLCP_DEFAULT_MIU; local->remote_lto = LLCP_DEFAULT_LTO; + mutex_init(&local->sdreq_lock); + INIT_HLIST_HEAD(&local->pending_sdreqs); + list_add(&local->list, &llcp_devices); return 0; diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 465f5953e2d..ca8c6d94ab8 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -97,6 +97,10 @@ struct nfc_llcp_local { u8 remote_opt; u16 remote_wks; + struct mutex sdreq_lock; + struct hlist_head pending_sdreqs; + u8 sdreq_next_tid; + /* sockets array */ struct llcp_sock_list sockets; struct llcp_sock_list connecting_sockets; @@ -230,7 +234,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap); +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, + size_t uri_len); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); int nfc_llcp_send_symm(struct nfc_dev *dev); @@ -238,6 +245,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local, struct hlist_head *tlv_list, size_t tlvs_len); +int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local, + struct hlist_head *tlv_list, size_t tlvs_len); int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason); int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock); int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 63975c039b8..73fd51098f4 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -56,6 +56,12 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, + [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { + [NFC_SDP_ATTR_URI] = { .type = NLA_STRING }, + [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 }, }; static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, @@ -351,6 +357,74 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) +{ + struct sk_buff *msg; + struct nlattr *sdp_attr, *uri_attr; + struct nfc_llcp_sdp_tlv *sdres; + struct hlist_node *n; + void *hdr; + int rc = -EMSGSIZE; + int i; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_LLC_SDRES); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP); + if (sdp_attr == NULL) { + rc = -ENOMEM; + goto nla_put_failure; + } + + i = 1; + hlist_for_each_entry_safe(sdres, n, sdres_list, node) { + pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap); + + uri_attr = nla_nest_start(msg, i++); + if (uri_attr == NULL) { + rc = -ENOMEM; + goto nla_put_failure; + } + + if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap)) + goto nla_put_failure; + + if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri)) + goto nla_put_failure; + + nla_nest_end(msg, uri_attr); + + hlist_del(&sdres->node); + + nfc_llcp_free_sdp_tlv(sdres); + } + + nla_nest_end(msg, sdp_attr); + + genlmsg_end(msg, hdr); + + return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC); + +nla_put_failure: + genlmsg_cancel(msg, hdr); + +free_msg: + nlmsg_free(msg); + + nfc_llcp_free_sdp_tlv_list(sdres_list); + + return rc; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, @@ -862,6 +936,96 @@ exit: return rc; } +static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + struct nfc_llcp_local *local; + struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1]; + u32 idx; + u8 tid; + char *uri; + int rc = 0, rem; + size_t uri_len, tlvs_len; + struct hlist_head sdreq_list; + struct nfc_llcp_sdp_tlv *sdreq; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_LLC_SDP]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) { + rc = -ENODEV; + goto exit; + } + + device_lock(&dev->dev); + + if (dev->dep_link_up == false) { + rc = -ENOLINK; + goto exit; + } + + local = nfc_llcp_find_local(dev); + if (!local) { + nfc_put_device(dev); + rc = -ENODEV; + goto exit; + } + + INIT_HLIST_HEAD(&sdreq_list); + + tlvs_len = 0; + + nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { + rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr, + nfc_sdp_genl_policy); + + if (rc != 0) { + rc = -EINVAL; + goto exit; + } + + if (!sdp_attrs[NFC_SDP_ATTR_URI]) + continue; + + uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]); + if (uri_len == 0) + continue; + + uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]); + if (uri == NULL || *uri == 0) + continue; + + tid = local->sdreq_next_tid++; + + sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len); + if (sdreq == NULL) { + rc = -ENOMEM; + goto exit; + } + + tlvs_len += sdreq->tlv_len; + + hlist_add_head(&sdreq->node, &sdreq_list); + } + + if (hlist_empty(&sdreq_list)) { + rc = -EINVAL; + goto exit; + } + + rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len); +exit: + device_unlock(&dev->dev); + + nfc_put_device(dev); + + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -916,6 +1080,11 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_llc_set_params, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_LLC_SDREQ, + .doit = nfc_genl_llc_sdreq, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 87d914d2876..94bfe19ba67 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -46,6 +46,8 @@ struct nfc_rawsock { #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) +struct nfc_llcp_sdp_tlv; + #ifdef CONFIG_NFC_LLCP void nfc_llcp_mac_is_down(struct nfc_dev *dev); @@ -59,6 +61,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); +void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head); #else @@ -112,6 +116,14 @@ static inline void nfc_llcp_exit(void) { } +static inline void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp) +{ +} + +static inline void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head) +{ +} + #endif int __init rawsock_init(void); @@ -144,6 +156,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev); int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol); int nfc_genl_tm_deactivated(struct nfc_dev *dev); +int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) -- cgit v1.2.3-70-g09d2 From 488b366a452934141959384c7a1b52b22d6154ef Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Mon, 11 Feb 2013 14:56:29 +0200 Subject: mac80211: add driver callback for per-interface multicast filter Some devices have multicast filter capability for each individual virtual interface rather than just a global one. Add an interface specific driver callback allowing such drivers to configure this. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ net/mac80211/driver-ops.h | 16 ++++++++++++++++ net/mac80211/iface.c | 11 +++++++++++ net/mac80211/trace.h | 24 ++++++++++++++++++++++++ 4 files changed, 58 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8c0ca11a39c..f5db5e97042 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2259,6 +2259,9 @@ enum ieee80211_roc_type { * See the section "Frame filtering" for more information. * This callback must be implemented and can sleep. * + * @set_multicast_list: Configure the device's interface specific RX multicast + * filter. This callback is optional. This callback must be atomic. + * * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit * must be set or cleared for a given STA. Must be atomic. * @@ -2605,6 +2608,10 @@ struct ieee80211_ops { unsigned int changed_flags, unsigned int *total_flags, u64 multicast); + void (*set_multicast_list)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, bool allmulti, + struct netdev_hw_addr_list *mc_list); + int (*set_tim)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set); int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 832acea4a5c..025b7592b79 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, return ret; } +static inline void drv_set_multicast_list(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct netdev_hw_addr_list *mc_list) +{ + bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + + trace_drv_set_multicast_list(local, sdata, mc_list->count); + + check_sdata_in_driver(sdata); + + if (local->ops->set_multicast_list) + local->ops->set_multicast_list(&local->hw, &sdata->vif, + allmulti, mc_list); + trace_drv_return_void(local); +} + static inline void drv_configure_filter(struct ieee80211_local *local, unsigned int changed_flags, unsigned int *total_flags, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d85282f6440..9875e321c9e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -919,6 +919,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev) atomic_dec(&local->iff_promiscs); sdata->flags ^= IEEE80211_SDATA_PROMISC; } + + /* + * TODO: If somebody needs this on AP interfaces, + * it can be enabled easily but multicast + * addresses from VLANs need to be synced. + */ + if (sdata->vif.type != NL80211_IFTYPE_MONITOR && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) + drv_set_multicast_list(local, sdata, &dev->mc); + spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e7db2b804e0..d97e4305cf1 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -431,6 +431,30 @@ TRACE_EVENT(drv_prepare_multicast, ) ); +TRACE_EVENT(drv_set_multicast_list, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, int mc_count), + + TP_ARGS(local, sdata, mc_count), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(bool, allmulti) + __field(int, mc_count) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + __entry->mc_count = mc_count; + ), + + TP_printk( + LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d", + LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti + ) +); + TRACE_EVENT(drv_configure_filter, TP_PROTO(struct ieee80211_local *local, unsigned int changed_flags, -- cgit v1.2.3-70-g09d2 From b818d1a7f72575eef17e00dc4085512c9cc8897d Mon Sep 17 00:00:00 2001 From: Hector Palacios Date: Sun, 10 Mar 2013 22:50:02 +0000 Subject: phy/micrel: Add support for KSZ8031 Micrel PHY KSZ8031 is similar to KSZ8021 and also requires the special initialization of "Operation Mode Strap Override" in reg 0x16 introduced in 212ea99 (phy/micrel: Implement support for KSZ8021). Signed-off-by: Hector Palacios Reviewed-by: Marek Vasut Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 14 ++++++++++++++ include/linux/micrel_phy.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index abf7b6153d0..018af1852fe 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -191,6 +191,19 @@ static struct phy_driver ksphy_driver[] = { .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_KSZ8031, + .phy_id_mask = 0x00ffffff, + .name = "Micrel KSZ8031", + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = ksz8021_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ8041, .phy_id_mask = 0x00fffff0, @@ -325,6 +338,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ8001, 0x00ffffff }, { PHY_ID_KS8737, 0x00fffff0 }, { PHY_ID_KSZ8021, 0x00ffffff }, + { PHY_ID_KSZ8031, 0x00ffffff }, { PHY_ID_KSZ8041, 0x00fffff0 }, { PHY_ID_KSZ8051, 0x00fffff0 }, { PHY_ID_KSZ8061, 0x00fffff0 }, diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 9dbb41a4e25..8752dbbc613 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -19,6 +19,7 @@ #define PHY_ID_KSZ9021 0x00221610 #define PHY_ID_KS8737 0x00221720 #define PHY_ID_KSZ8021 0x00221555 +#define PHY_ID_KSZ8031 0x00221556 #define PHY_ID_KSZ8041 0x00221510 #define PHY_ID_KSZ8051 0x00221550 /* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */ -- cgit v1.2.3-70-g09d2 From 6ba8a3b19e764b6a65e4030ab0999be50c291e6c Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 11 Mar 2013 10:00:43 +0000 Subject: tcp: Tail loss probe (TLP) This patch series implement the Tail loss probe (TLP) algorithm described in http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01. The first patch implements the basic algorithm. TLP's goal is to reduce tail latency of short transactions. It achieves this by converting retransmission timeouts (RTOs) occuring due to tail losses (losses at end of transactions) into fast recovery. TLP transmits one packet in two round-trips when a connection is in Open state and isn't receiving any ACKs. The transmitted packet, aka loss probe, can be either new or a retransmission. When there is tail loss, the ACK from a loss probe triggers FACK/early-retransmit based fast recovery, thus avoiding a costly RTO. In the absence of loss, there is no change in the connection state. PTO stands for probe timeout. It is a timer event indicating that an ACK is overdue and triggers a loss probe packet. The PTO value is set to max(2*SRTT, 10ms) and is adjusted to account for delayed ACK timer when there is only one oustanding packet. TLP Algorithm On transmission of new data in Open state: -> packets_out > 1: schedule PTO in max(2*SRTT, 10ms). -> packets_out == 1: schedule PTO in max(2*RTT, 1.5*RTT + 200ms) -> PTO = min(PTO, RTO) Conditions for scheduling PTO: -> Connection is in Open state. -> Connection is either cwnd limited or no new data to send. -> Number of probes per tail loss episode is limited to one. -> Connection is SACK enabled. When PTO fires: new_segment_exists: -> transmit new segment. -> packets_out++. cwnd remains same. no_new_packet: -> retransmit the last segment. Its ACK triggers FACK or early retransmit based recovery. ACK path: -> rearm RTO at start of ACK processing. -> reschedule PTO if need be. In addition, the patch includes a small variation to the Early Retransmit (ER) algorithm, such that ER and TLP together can in principle recover any N-degree of tail loss through fast recovery. TLP is controlled by the same sysctl as ER, tcp_early_retrans sysctl. tcp_early_retrans==0; disables TLP and ER. ==1; enables RFC5827 ER. ==2; delayed ER. ==3; TLP and delayed ER. [DEFAULT] ==4; TLP only. The TLP patch series have been extensively tested on Google Web servers. It is most effective for short Web trasactions, where it reduced RTOs by 15% and improved HTTP response time (average by 6%, 99th percentile by 10%). The transmitted probes account for <0.5% of the overall transmissions. Signed-off-by: Nandita Dukkipati Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++- include/linux/tcp.h | 1 - include/net/inet_connection_sock.h | 5 +- include/net/tcp.h | 6 +- include/uapi/linux/snmp.h | 1 + net/ipv4/inet_diag.c | 4 +- net/ipv4/proc.c | 1 + net/ipv4/sysctl_net_ipv4.c | 4 +- net/ipv4/tcp_input.c | 24 ++++--- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/tcp_output.c | 128 +++++++++++++++++++++++++++++++-- net/ipv4/tcp_timer.c | 13 ++-- 12 files changed, 171 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index dc2dc87d255..1cae6c383e1 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -190,7 +190,9 @@ tcp_early_retrans - INTEGER Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold for triggering fast retransmit when the amount of outstanding data is small and when no previously unsent data can be transmitted (such - that limited transmit could be used). + that limited transmit could be used). Also controls the use of + Tail loss probe (TLP) that converts RTOs occuring due to tail + losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01). Possible values: 0 disables ER 1 enables ER @@ -198,7 +200,9 @@ tcp_early_retrans - INTEGER by a fourth of RTT. This mitigates connection falsely recovers when network has a small degree of reordering (less than 3 packets). - Default: 2 + 3 enables delayed ER and TLP. + 4 enables TLP only. + Default: 3 tcp_ecn - INTEGER Control use of Explicit Congestion Notification (ECN) by TCP. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 515c3746b67..01860d74555 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,7 +201,6 @@ struct tcp_sock { unused : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - early_retrans_delayed:1, /* Delayed ER timer installed */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 183292722f6..de2c78529af 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -133,6 +133,8 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ +#define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { @@ -222,7 +224,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, when = max_when; } - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || + what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); diff --git a/include/net/tcp.h b/include/net/tcp.h index a2baa5e4ba3..ab9f947b118 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -543,6 +543,8 @@ extern bool tcp_syn_flood_action(struct sock *sk, extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); +extern void tcp_send_loss_probe(struct sock *sk); +extern bool tcp_schedule_loss_probe(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); @@ -873,8 +875,8 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) static inline void tcp_enable_early_retrans(struct tcp_sock *tp) { tp->do_early_retrans = sysctl_tcp_early_retrans && - !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3; - tp->early_retrans_delayed = 0; + sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && + sysctl_tcp_reordering == 3; } static inline void tcp_disable_early_retrans(struct tcp_sock *tp) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b49eab89c9f..290bed6b085 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -202,6 +202,7 @@ enum LINUX_MIB_TCPFORWARDRETRANS, /* TCPForwardRetrans */ LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ + LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7afa2c3c788..8620408af57 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 32030a24e77..4c35911d935 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -224,6 +224,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), + SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 960fd29d9b8..cca4550f408 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,7 +28,7 @@ static int zero; static int one = 1; -static int two = 2; +static int four = 4; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -760,7 +760,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, - .extra2 = &two, + .extra2 = &four, }, { .procname = "udp_mem", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99..b794f89ac1f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -98,7 +98,7 @@ int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 2; +int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -2150,15 +2150,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples * available, or RTO is scheduled to fire first. */ - if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) + if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || + (flag & FLAG_ECE) || !tp->srtt) return false; delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); - tp->early_retrans_delayed = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, + TCP_RTO_MAX); return true; } @@ -2321,7 +2322,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) * interval if appropriate. */ if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && + (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && !tcp_may_send_now(sk)) return !tcp_pause_early_retransmit(sk, flag); @@ -3081,6 +3082,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ void tcp_rearm_rto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* If the retrans timer is currently being used by Fast Open @@ -3094,12 +3096,13 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (tp->early_retrans_delayed) { + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); /* delta may not be positive if the socket is locked - * when the delayed ER timer fires and is rescheduled. + * when the retrans timer fires and is rescheduled. */ if (delta > 0) rto = delta; @@ -3107,7 +3110,6 @@ void tcp_rearm_rto(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); } - tp->early_retrans_delayed = 0; } /* This function is called when the delayed ER timer fires. TCP enters @@ -3601,7 +3603,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (tp->early_retrans_delayed) + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) @@ -3678,6 +3681,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (dst) dst_confirm(dst); } + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) + tcp_schedule_loss_probe(sk); return 1; no_queue: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8cdee120a50..b7ab868c828 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2703,7 +2703,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) __u16 srcp = ntohs(inet->inet_sport); int rx_queue; - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461074d..beb63dbc85f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -74,6 +74,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; @@ -85,7 +86,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tp->frto_counter = 3; tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || tp->early_retrans_delayed) + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); } @@ -1959,6 +1961,9 @@ static int tcp_mtu_probe(struct sock *sk) * snd_up-64k-mss .. snd_up cannot be large. However, taking into * account rare use of URG, this is not a big flaw. * + * Send at most one packet when push_one > 0. Temporarily ignore + * cwnd limit to force at most one packet out when push_one == 2. + * Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. */ @@ -1994,8 +1999,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, goto repair; /* Skip network transmission */ cwnd_quota = tcp_cwnd_test(tp, skb); - if (!cwnd_quota) - break; + if (!cwnd_quota) { + if (push_one == 2) + /* Force out a loss probe pkt. */ + cwnd_quota = 1; + else + break; + } if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; @@ -2049,10 +2059,120 @@ repair: if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; + + /* Send one loss probe per tail loss episode. */ + if (push_one != 2) + tcp_schedule_loss_probe(sk); tcp_cwnd_validate(sk); return false; } - return !tp->packets_out && tcp_send_head(sk); + return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); +} + +bool tcp_schedule_loss_probe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 timeout, tlp_time_stamp, rto_time_stamp; + u32 rtt = tp->srtt >> 3; + + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) + return false; + /* No consecutive loss probes. */ + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { + tcp_rearm_rto(sk); + return false; + } + /* Don't do any loss probe on a Fast Open connection before 3WHS + * finishes. + */ + if (sk->sk_state == TCP_SYN_RECV) + return false; + + /* TLP is only scheduled when next timer event is RTO. */ + if (icsk->icsk_pending != ICSK_TIME_RETRANS) + return false; + + /* Schedule a loss probe in 2*RTT for SACK capable connections + * in Open state, that are either limited by cwnd or application. + */ + if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || + !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + return false; + + if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && + tcp_send_head(sk)) + return false; + + /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + * for delayed ack when there's one outstanding packet. + */ + timeout = rtt << 1; + if (tp->packets_out == 1) + timeout = max_t(u32, timeout, + (rtt + (rtt >> 1) + TCP_DELACK_MAX)); + timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + + /* If RTO is shorter, just schedule TLP in its place. */ + tlp_time_stamp = tcp_time_stamp + timeout; + rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; + if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { + s32 delta = rto_time_stamp - tcp_time_stamp; + if (delta > 0) + timeout = delta; + } + + inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, + TCP_RTO_MAX); + return true; +} + +/* When probe timeout (PTO) fires, send a new segment if one exists, else + * retransmit the last segment. + */ +void tcp_send_loss_probe(struct sock *sk) +{ + struct sk_buff *skb; + int pcount; + int mss = tcp_current_mss(sk); + int err = -1; + + if (tcp_send_head(sk) != NULL) { + err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); + goto rearm_timer; + } + + /* Retransmit last segment. */ + skb = tcp_write_queue_tail(sk); + if (WARN_ON(!skb)) + goto rearm_timer; + + pcount = tcp_skb_pcount(skb); + if (WARN_ON(!pcount)) + goto rearm_timer; + + if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { + if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) + goto rearm_timer; + skb = tcp_write_queue_tail(sk); + } + + if (WARN_ON(!skb || !tcp_skb_pcount(skb))) + goto rearm_timer; + + /* Probe with zero data doesn't trigger fast recovery. */ + if (skb->len > 0) + err = __tcp_retransmit_skb(sk, skb); + +rearm_timer: + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); + + if (likely(!err)) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBES); + return; } /* Push out any pending frames which were held back due to diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b78aac30c49..ecd61d54147 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - if (tp->early_retrans_delayed) { - tcp_resume_early_retransmit(sk); - return; - } if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); @@ -495,13 +491,20 @@ void tcp_write_timer_handler(struct sock *sk) } event = icsk->icsk_pending; - icsk->icsk_pending = 0; switch (event) { + case ICSK_TIME_EARLY_RETRANS: + tcp_resume_early_retransmit(sk); + break; + case ICSK_TIME_LOSS_PROBE: + tcp_send_loss_probe(sk); + break; case ICSK_TIME_RETRANS: + icsk->icsk_pending = 0; tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: + icsk->icsk_pending = 0; tcp_probe_timer(sk); break; } -- cgit v1.2.3-70-g09d2 From 9b717a8d245075ffb8e95a2dfb4ee97ce4747457 Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 11 Mar 2013 10:00:44 +0000 Subject: tcp: TLP loss detection. This is the second of the TLP patch series; it augments the basic TLP algorithm with a loss detection scheme. This patch implements a mechanism for loss detection when a Tail loss probe retransmission plugs a hole thereby masking packet loss from the sender. The loss detection algorithm relies on counting TLP dupacks as outlined in Sec. 3 of: http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01 The basic idea is: Sender keeps track of TLP "episode" upon retransmission of a TLP packet. An episode ends when the sender receives an ACK above the SND.NXT (tracked by tlp_high_seq) at the time of the episode. We want to make sure that before the episode ends the sender receives a "TLP dupack", indicating that the TLP retransmission was unnecessary, so there was no loss/hole that needed plugging. If the sender gets no TLP dupack before the end of the episode, then it reduces ssthresh and the congestion window, because the TLP packet arriving at the receiver probably plugged a hole. Signed-off-by: Nandita Dukkipati Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 39 +++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 9 +++++++++ net/ipv4/tcp_timer.c | 2 ++ 7 files changed, 54 insertions(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 01860d74555..763c108ee03 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -204,6 +204,7 @@ struct tcp_sock { syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 290bed6b085..e00013a1deb 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -203,6 +203,7 @@ enum LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ + LINUX_MIB_TCPLOSSPROBERECOVERY, /* TCPLossProbeRecovery */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4c35911d935..b6f2ea17489 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -225,6 +225,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), + SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b794f89ac1f..836d74dd018 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2682,6 +2682,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; + tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -3569,6 +3570,38 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +/* This routine deals with acks during a TLP episode. + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. + */ +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool is_tlp_dupack = (ack == tp->tlp_high_seq) && + !(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED)); + + /* Mark the end of TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. + */ + if (is_tlp_dupack) { + tp->tlp_high_seq = 0; + return; + } + + if (after(ack, tp->tlp_high_seq)) { + tp->tlp_high_seq = 0; + /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ + if (!(flag & FLAG_DSACKING_ACK)) { + tcp_init_cwnd_reduction(sk, true); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_Open); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3676,6 +3709,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_cong_avoid(sk, ack, prior_in_flight); } + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst) @@ -3697,6 +3733,9 @@ no_queue: */ if (tcp_send_head(sk)) tcp_ack_probe(sk); + + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); return 1; invalid_ack: diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b83a49cc381..4bdb09fca40 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -440,6 +440,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_enable_early_retrans(newtp); + newtp->tlp_high_seq = 0; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index beb63dbc85f..8e7742f0b5d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2132,6 +2132,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) */ void tcp_send_loss_probe(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int pcount; int mss = tcp_current_mss(sk); @@ -2142,6 +2143,10 @@ void tcp_send_loss_probe(struct sock *sk) goto rearm_timer; } + /* At most one outstanding TLP retransmission. */ + if (tp->tlp_high_seq) + goto rearm_timer; + /* Retransmit last segment. */ skb = tcp_write_queue_tail(sk); if (WARN_ON(!skb)) @@ -2164,6 +2169,10 @@ void tcp_send_loss_probe(struct sock *sk) if (skb->len > 0) err = __tcp_retransmit_skb(sk, skb); + /* Record snd_nxt for loss detection. */ + if (likely(!err)) + tp->tlp_high_seq = tp->snd_nxt; + rearm_timer: inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ecd61d54147..eeccf795e91 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -356,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk) WARN_ON(tcp_write_queue_empty(sk)); + tp->tlp_high_seq = 0; + if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits -- cgit v1.2.3-70-g09d2 From 42e836eb4527fb635cb799a701fe4c9fe741c03a Mon Sep 17 00:00:00 2001 From: Michael Stapelberg Date: Mon, 11 Mar 2013 13:56:44 +0000 Subject: phy: add set_wol/get_wol functions This allows ethernet drivers (such as the mv643xx_eth) to support Wake on LAN on platforms where PHY registers have to be configured for Wake on LAN (e.g. the Marvell Kirkwood based qnap TS-119P II). Signed-off-by: Michael Stapelberg Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 16 ++++++++++++++++ include/linux/phy.h | 10 ++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ef9ea924822..298b4c20173 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1188,3 +1188,19 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data) return 0; } EXPORT_SYMBOL(phy_ethtool_set_eee); + +int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + if (phydev->drv->set_wol) + return phydev->drv->set_wol(phydev, wol); + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(phy_ethtool_set_wol); + +void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + if (phydev->drv->get_wol) + phydev->drv->get_wol(phydev, wol); +} +EXPORT_SYMBOL(phy_ethtool_get_wol); diff --git a/include/linux/phy.h b/include/linux/phy.h index 33999adbf8c..9e11039dd7a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -455,6 +455,14 @@ struct phy_driver { */ void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type); + /* Some devices (e.g. qnap TS-119P II) require PHY register changes to + * enable Wake on LAN, so set_wol is provided to be called in the + * ethernet driver's set_wol function. */ + int (*set_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); + + /* See set_wol, but for checking whether Wake on LAN is enabled. */ + void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) @@ -560,6 +568,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); int __init mdio_bus_init(void); void mdio_bus_exit(void); -- cgit v1.2.3-70-g09d2 From e86ac13b031cf71d8f40ff513e627aac80e6b765 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Mon, 11 Mar 2013 23:16:35 +0000 Subject: drivers: net: ethernet: cpsw: change cpts_active_slave to active_slave Change cpts_active_slave to active_slave so that the same DT property can be used to ethtool and SIOCGMIIPHY. CC: Richard Cochran Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 7 ++++--- arch/arm/boot/dts/am33xx.dtsi | 2 +- drivers/net/ethernet/ti/cpsw.c | 10 +++++----- include/linux/platform_data/cpsw.h | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 8e49c420092..4f2ca6b4a18 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -15,7 +15,8 @@ Required properties: - mac_control : Specifies Default MAC control register content for the specific platform - slaves : Specifies number for slaves -- cpts_active_slave : Specifies the slave to use for time stamping +- active_slave : Specifies the slave to use for time stamping, + ethtool and SIOCGMIIPHY - cpts_clock_mult : Numerator to convert input clock ticks into nanoseconds - cpts_clock_shift : Denominator to convert input clock ticks into nanoseconds @@ -52,7 +53,7 @@ Examples: rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; cpsw_emac0: slave@0 { @@ -78,7 +79,7 @@ Examples: rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; cpsw_emac0: slave@0 { diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 0957645b73a..91fe4f148f8 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -349,7 +349,7 @@ rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; reg = <0x4a100000 0x800 diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 01ffbc48698..98aa17a9516 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -942,7 +942,7 @@ static void cpsw_ndo_change_rx_flags(struct net_device *ndev, int flags) static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) { - struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave]; + struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave]; u32 ts_en, seq_id; if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) { @@ -971,7 +971,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) if (priv->data.dual_emac) slave = &priv->slaves[priv->emac_port]; else - slave = &priv->slaves[priv->data.cpts_active_slave]; + slave = &priv->slaves[priv->data.active_slave]; ctrl = slave_read(slave, CPSW2_CONTROL); ctrl &= ~CTRL_ALL_TS_MASK; @@ -1282,12 +1282,12 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->slaves = prop; - if (of_property_read_u32(node, "cpts_active_slave", &prop)) { - pr_err("Missing cpts_active_slave property in the DT.\n"); + if (of_property_read_u32(node, "active_slave", &prop)) { + pr_err("Missing active_slave property in the DT.\n"); ret = -EINVAL; goto error_ret; } - data->cpts_active_slave = prop; + data->active_slave = prop; if (of_property_read_u32(node, "cpts_clock_mult", &prop)) { pr_err("Missing cpts_clock_mult property in the DT.\n"); diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h index 798fb80b024..bb3cd58d71e 100644 --- a/include/linux/platform_data/cpsw.h +++ b/include/linux/platform_data/cpsw.h @@ -30,7 +30,7 @@ struct cpsw_platform_data { u32 channels; /* number of cpdma channels (symmetric) */ u32 slaves; /* number of slave cpgmac ports */ struct cpsw_slave_data *slave_data; - u32 cpts_active_slave; /* time stamping slave */ + u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */ u32 cpts_clock_mult; /* convert input clock ticks to nanoseconds */ u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */ u32 ale_entries; /* ale table size */ -- cgit v1.2.3-70-g09d2 From 8a7fbfab4be39b8690543f3d29b26860d2f6c576 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Tue, 12 Mar 2013 02:49:01 +0000 Subject: netxen: write IP address to firmware when using bonding This patch allows LRO aggregation on bonded devices that contain an NX3031 device. It also adds a for_each_netdev_in_bond_rcu(bond, slave) macro which executes for each slave that has bond as master. V3: After testing and discussing this with Rajesh, I decided to keep the vlan ip cache and just rename it to ip_cache since it will store bond ip addresses too. A new master flag has been added to the ip cache to denote that the address has been added because of a master device. I've taken care of the enslave/release cases by checking for various combinations of events and flags (e.g. netxen has a master, it's a bond master and it's not marked as a slave means it is being enslaved and is dev_open()ed in bond_enslave). I've changed netxen_free_ip_list() to have a "master" parameter which causes all IP addresses marked as master to be deleted (used when a netxen is being released). I've made the patch use the new upper device API as well. The following cases were tested: - bond -> netxen - vlan -> netxen - vlan -> bond -> netxen V2: Remove local ip caching, retrieve addresses dynamically and restore them if necessary. Note: Tested with NX3031 adapter. Tested-by: Rajesh Borundia Signed-off-by: Andy Gospodarek Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic.h | 5 +- .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 220 ++++++++++++++------- include/linux/netdevice.h | 8 + 3 files changed, 155 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index eb3dfdbb642..322a36b7672 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -955,9 +955,10 @@ typedef struct nx_mac_list_s { uint8_t mac_addr[ETH_ALEN+2]; } nx_mac_list_t; -struct nx_vlan_ip_list { +struct nx_ip_list { struct list_head list; __be32 ip_addr; + bool master; }; /* @@ -1605,7 +1606,7 @@ struct netxen_adapter { struct net_device *netdev; struct pci_dev *pdev; struct list_head mac_list; - struct list_head vlan_ip_list; + struct list_head ip_list; spinlock_t tx_clean_lock; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 501f49207da..7867aebc05f 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -90,7 +90,7 @@ static irqreturn_t netxen_intr(int irq, void *data); static irqreturn_t netxen_msi_intr(int irq, void *data); static irqreturn_t netxen_msix_intr(int irq, void *data); -static void netxen_free_vlan_ip_list(struct netxen_adapter *); +static void netxen_free_ip_list(struct netxen_adapter *, bool); static void netxen_restore_indev_addr(struct net_device *dev, unsigned long); static struct rtnl_link_stats64 *netxen_nic_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); @@ -1450,7 +1450,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->tx_clean_lock); INIT_LIST_HEAD(&adapter->mac_list); - INIT_LIST_HEAD(&adapter->vlan_ip_list); + INIT_LIST_HEAD(&adapter->ip_list); err = netxen_setup_pci_map(adapter); if (err) @@ -1585,7 +1585,7 @@ static void netxen_nic_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->tx_timeout_task); - netxen_free_vlan_ip_list(adapter); + netxen_free_ip_list(adapter, false); netxen_nic_detach(adapter); nx_decr_dev_ref_cnt(adapter); @@ -3137,62 +3137,77 @@ netxen_destip_supported(struct netxen_adapter *adapter) } static void -netxen_free_vlan_ip_list(struct netxen_adapter *adapter) +netxen_free_ip_list(struct netxen_adapter *adapter, bool master) { - struct nx_vlan_ip_list *cur; - struct list_head *head = &adapter->vlan_ip_list; + struct nx_ip_list *cur, *tmp_cur; - while (!list_empty(head)) { - cur = list_entry(head->next, struct nx_vlan_ip_list, list); - netxen_config_ipaddr(adapter, cur->ip_addr, NX_IP_DOWN); - list_del(&cur->list); - kfree(cur); + list_for_each_entry_safe(cur, tmp_cur, &adapter->ip_list, list) { + if (master) { + if (cur->master) { + netxen_config_ipaddr(adapter, cur->ip_addr, + NX_IP_DOWN); + list_del(&cur->list); + kfree(cur); + } + } else { + netxen_config_ipaddr(adapter, cur->ip_addr, NX_IP_DOWN); + list_del(&cur->list); + kfree(cur); + } } - } -static void -netxen_list_config_vlan_ip(struct netxen_adapter *adapter, + +static bool +netxen_list_config_ip(struct netxen_adapter *adapter, struct in_ifaddr *ifa, unsigned long event) { struct net_device *dev; - struct nx_vlan_ip_list *cur, *tmp_cur; + struct nx_ip_list *cur, *tmp_cur; struct list_head *head; + bool ret = false; dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL; if (dev == NULL) - return; - - if (!is_vlan_dev(dev)) - return; + goto out; switch (event) { case NX_IP_UP: - list_for_each(head, &adapter->vlan_ip_list) { - cur = list_entry(head, struct nx_vlan_ip_list, list); + list_for_each(head, &adapter->ip_list) { + cur = list_entry(head, struct nx_ip_list, list); if (cur->ip_addr == ifa->ifa_address) - return; + goto out; } - cur = kzalloc(sizeof(struct nx_vlan_ip_list), GFP_ATOMIC); + cur = kzalloc(sizeof(struct nx_ip_list), GFP_ATOMIC); if (cur == NULL) - return; - + goto out; + if (dev->priv_flags & IFF_802_1Q_VLAN) + dev = vlan_dev_real_dev(dev); + cur->master = !!netif_is_bond_master(dev); cur->ip_addr = ifa->ifa_address; - list_add_tail(&cur->list, &adapter->vlan_ip_list); + list_add_tail(&cur->list, &adapter->ip_list); + netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP); + ret = true; break; case NX_IP_DOWN: list_for_each_entry_safe(cur, tmp_cur, - &adapter->vlan_ip_list, list) { + &adapter->ip_list, list) { if (cur->ip_addr == ifa->ifa_address) { list_del(&cur->list); kfree(cur); + netxen_config_ipaddr(adapter, ifa->ifa_address, + NX_IP_DOWN); + ret = true; break; } } } +out: + return ret; } + static void netxen_config_indev_addr(struct netxen_adapter *adapter, struct net_device *dev, unsigned long event) @@ -3209,14 +3224,10 @@ netxen_config_indev_addr(struct netxen_adapter *adapter, for_ifa(indev) { switch (event) { case NETDEV_UP: - netxen_config_ipaddr(adapter, - ifa->ifa_address, NX_IP_UP); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP); + netxen_list_config_ip(adapter, ifa, NX_IP_UP); break; case NETDEV_DOWN: - netxen_config_ipaddr(adapter, - ifa->ifa_address, NX_IP_DOWN); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN); + netxen_list_config_ip(adapter, ifa, NX_IP_DOWN); break; default: break; @@ -3231,23 +3242,78 @@ netxen_restore_indev_addr(struct net_device *netdev, unsigned long event) { struct netxen_adapter *adapter = netdev_priv(netdev); - struct nx_vlan_ip_list *pos, *tmp_pos; + struct nx_ip_list *pos, *tmp_pos; unsigned long ip_event; ip_event = (event == NETDEV_UP) ? NX_IP_UP : NX_IP_DOWN; netxen_config_indev_addr(adapter, netdev, event); - list_for_each_entry_safe(pos, tmp_pos, &adapter->vlan_ip_list, list) { + list_for_each_entry_safe(pos, tmp_pos, &adapter->ip_list, list) { netxen_config_ipaddr(adapter, pos->ip_addr, ip_event); } } +static inline bool +netxen_config_checkdev(struct net_device *dev) +{ + struct netxen_adapter *adapter; + + if (!is_netxen_netdev(dev)) + return false; + adapter = netdev_priv(dev); + if (!adapter) + return false; + if (!netxen_destip_supported(adapter)) + return false; + if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) + return false; + + return true; +} + +/** + * netxen_config_master - configure addresses based on master + * @dev: netxen device + * @event: netdev event + */ +static void netxen_config_master(struct net_device *dev, unsigned long event) +{ + struct net_device *master, *slave; + struct netxen_adapter *adapter = netdev_priv(dev); + + rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + /* + * This is the case where the netxen nic is being + * enslaved and is dev_open()ed in bond_enslave() + * Now we should program the bond's (and its vlans') + * addresses in the netxen NIC. + */ + if (master && netif_is_bond_master(master) && + !netif_is_bond_slave(dev)) { + netxen_config_indev_addr(adapter, master, event); + for_each_netdev_rcu(&init_net, slave) + if (slave->priv_flags & IFF_802_1Q_VLAN && + vlan_dev_real_dev(slave) == master) + netxen_config_indev_addr(adapter, slave, event); + } + rcu_read_unlock(); + /* + * This is the case where the netxen nic is being + * released and is dev_close()ed in bond_release() + * just before IFF_BONDING is stripped. + */ + if (!master && dev->priv_flags & IFF_BONDING) + netxen_free_ip_list(adapter, true); +} + static int netxen_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netxen_adapter *adapter; struct net_device *dev = (struct net_device *)ptr; struct net_device *orig_dev = dev; + struct net_device *slave; recheck: if (dev == NULL) @@ -3257,19 +3323,28 @@ recheck: dev = vlan_dev_real_dev(dev); goto recheck; } - - if (!is_netxen_netdev(dev)) - goto done; - - adapter = netdev_priv(dev); - - if (!adapter) - goto done; - - if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) - goto done; - - netxen_config_indev_addr(adapter, orig_dev, event); + if (event == NETDEV_UP || event == NETDEV_DOWN) { + /* If this is a bonding device, look for netxen-based slaves*/ + if (netif_is_bond_master(dev)) { + rcu_read_lock(); + for_each_netdev_in_bond_rcu(dev, slave) { + if (!netxen_config_checkdev(slave)) + continue; + adapter = netdev_priv(slave); + netxen_config_indev_addr(adapter, + orig_dev, event); + } + rcu_read_unlock(); + } else { + if (!netxen_config_checkdev(dev)) + goto done; + adapter = netdev_priv(dev); + /* Act only if the actual netxen is the target */ + if (orig_dev == dev) + netxen_config_master(dev, event); + netxen_config_indev_addr(adapter, orig_dev, event); + } + } done: return NOTIFY_DONE; } @@ -3279,12 +3354,12 @@ netxen_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netxen_adapter *adapter; - struct net_device *dev; - + struct net_device *dev, *slave; struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + unsigned long ip_event; dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL; - + ip_event = (event == NETDEV_UP) ? NX_IP_UP : NX_IP_DOWN; recheck: if (dev == NULL) goto done; @@ -3293,31 +3368,24 @@ recheck: dev = vlan_dev_real_dev(dev); goto recheck; } - - if (!is_netxen_netdev(dev)) - goto done; - - adapter = netdev_priv(dev); - - if (!adapter || !netxen_destip_supported(adapter)) - goto done; - - if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) - goto done; - - switch (event) { - case NETDEV_UP: - netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP); - break; - case NETDEV_DOWN: - netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_DOWN); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN); - break; - default: - break; + if (event == NETDEV_UP || event == NETDEV_DOWN) { + /* If this is a bonding device, look for netxen-based slaves*/ + if (netif_is_bond_master(dev)) { + rcu_read_lock(); + for_each_netdev_in_bond_rcu(dev, slave) { + if (!netxen_config_checkdev(slave)) + continue; + adapter = netdev_priv(slave); + netxen_list_config_ip(adapter, ifa, ip_event); + } + rcu_read_unlock(); + } else { + if (!netxen_config_checkdev(dev)) + goto done; + adapter = netdev_priv(dev); + netxen_list_config_ip(adapter, ifa, ip_event); + } } - done: return NOTIFY_DONE; } @@ -3334,7 +3402,7 @@ static void netxen_restore_indev_addr(struct net_device *dev, unsigned long event) { } static void -netxen_free_vlan_ip_list(struct netxen_adapter *adapter) +netxen_free_ip_list(struct netxen_adapter *adapter, bool master) { } #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e1ebeffa6b3..9fc1ab0c891 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1617,6 +1617,9 @@ extern seqcount_t devnet_rename_seq; /* Device rename seq */ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_in_bond_rcu(bond, slave) \ + for_each_netdev_rcu(&init_net, slave) \ + if (netdev_master_upper_dev_get_rcu(slave) == bond) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) static inline struct net_device *next_net_device(struct net_device *dev) @@ -2774,6 +2777,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline bool netif_is_bond_master(struct net_device *dev) +{ + return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; +} + static inline bool netif_is_bond_slave(struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; -- cgit v1.2.3-70-g09d2 From 764444f5a324ad5a272773f078192819084388ce Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Wed, 13 Mar 2013 16:57:25 +0000 Subject: net: clean leftover of COMPAT_NET_DEV_OPS removal COMPAT_NET_DEV_OPS was removed a while back and with it the definition of netdev_resync_ops() went away. Let's finish the clean-up. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9fc1ab0c891..56e3e066527 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1692,7 +1692,6 @@ extern int netdev_refcnt_read(const struct net_device *dev); extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int init_dummy_netdev(struct net_device *dev); -extern void netdev_resync_ops(struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); -- cgit v1.2.3-70-g09d2 From 7a875903389f3492d4cb06faa1d55a1630e77c11 Mon Sep 17 00:00:00 2001 From: Erwan Yvin Date: Mon, 11 Mar 2013 03:13:03 +0000 Subject: caif: remove caif_shm caif_shm is an old implementation caif_shm will be replaced by caif_virtio [ As explained by Linus Walleij: "U5500 used this, but was cancelled and the silicon did not reach anyone outside ST-Ericsson. Then for the next platforms, we have gone for the leaner & cleaner approach of using virtio, rpmesg and rproc." ] Signed-off-by: Erwan Yvin Acked-by: Linus Walleij Acked-by: Sjur Brendeland Signed-off-by: David S. Miller --- drivers/net/caif/Kconfig | 7 - drivers/net/caif/Makefile | 4 - drivers/net/caif/caif_shm_u5500.c | 128 ------- drivers/net/caif/caif_shmcore.c | 744 -------------------------------------- include/net/caif/caif_shm.h | 26 -- 5 files changed, 909 deletions(-) delete mode 100644 drivers/net/caif/caif_shm_u5500.c delete mode 100644 drivers/net/caif/caif_shmcore.c delete mode 100644 include/net/caif/caif_shm.h (limited to 'include') diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 60c2142373c..a966128c2a7 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -32,13 +32,6 @@ config CAIF_SPI_SYNC help to synchronize to the next transfer in case of over or under-runs. This option also needs to be enabled on the modem. -config CAIF_SHM - tristate "CAIF shared memory protocol driver" - depends on CAIF && U5500_MBOX - default n - ---help--- - The CAIF shared memory protocol driver for the STE UX5500 platform. - config CAIF_HSI tristate "CAIF HSI transport driver" depends on CAIF diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile index 91dff861560..15a9d2fc753 100644 --- a/drivers/net/caif/Makefile +++ b/drivers/net/caif/Makefile @@ -7,9 +7,5 @@ obj-$(CONFIG_CAIF_TTY) += caif_serial.o cfspi_slave-objs := caif_spi.o caif_spi_slave.o obj-$(CONFIG_CAIF_SPI_SLAVE) += cfspi_slave.o -# Shared memory -caif_shm-objs := caif_shmcore.o caif_shm_u5500.o -obj-$(CONFIG_CAIF_SHM) += caif_shm.o - # HSI interface obj-$(CONFIG_CAIF_HSI) += caif_hsi.o diff --git a/drivers/net/caif/caif_shm_u5500.c b/drivers/net/caif/caif_shm_u5500.c deleted file mode 100644 index 89d76b7b325..00000000000 --- a/drivers/net/caif/caif_shm_u5500.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":" fmt - -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("CAIF Shared Memory protocol driver"); - -#define MAX_SHM_INSTANCES 1 - -enum { - MBX_ACC0, - MBX_ACC1, - MBX_DSP -}; - -static struct shmdev_layer shmdev_lyr[MAX_SHM_INSTANCES]; - -static unsigned int shm_start; -static unsigned int shm_size; - -module_param(shm_size, uint , 0440); -MODULE_PARM_DESC(shm_total_size, "Start of SHM shared memory"); - -module_param(shm_start, uint , 0440); -MODULE_PARM_DESC(shm_total_start, "Total Size of SHM shared memory"); - -static int shmdev_send_msg(u32 dev_id, u32 mbx_msg) -{ - /* Always block until msg is written successfully */ - mbox_send(shmdev_lyr[dev_id].hmbx, mbx_msg, true); - return 0; -} - -static int shmdev_mbx_setup(void *pshmdrv_cb, struct shmdev_layer *pshm_dev, - void *pshm_drv) -{ - /* - * For UX5500, we have only 1 SHM instance which uses MBX0 - * for communication with the peer modem - */ - pshm_dev->hmbx = mbox_setup(MBX_ACC0, pshmdrv_cb, pshm_drv); - - if (!pshm_dev->hmbx) - return -ENODEV; - else - return 0; -} - -static int __init caif_shmdev_init(void) -{ - int i, result; - - /* Loop is currently overkill, there is only one instance */ - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - - shmdev_lyr[i].shm_base_addr = shm_start; - shmdev_lyr[i].shm_total_sz = shm_size; - - if (((char *)shmdev_lyr[i].shm_base_addr == NULL) - || (shmdev_lyr[i].shm_total_sz <= 0)) { - pr_warn("ERROR," - "Shared memory Address and/or Size incorrect" - ", Bailing out ...\n"); - result = -EINVAL; - goto clean; - } - - pr_info("SHM AREA (instance %d) STARTS" - " AT %p\n", i, (char *)shmdev_lyr[i].shm_base_addr); - - shmdev_lyr[i].shm_id = i; - shmdev_lyr[i].pshmdev_mbxsend = shmdev_send_msg; - shmdev_lyr[i].pshmdev_mbxsetup = shmdev_mbx_setup; - - /* - * Finally, CAIF core module is called with details in place: - * 1. SHM base address - * 2. SHM size - * 3. MBX handle - */ - result = caif_shmcore_probe(&shmdev_lyr[i]); - if (result) { - pr_warn("ERROR[%d]," - "Could not probe SHM core (instance %d)" - " Bailing out ...\n", result, i); - goto clean; - } - } - - return 0; - -clean: - /* - * For now, we assume that even if one instance of SHM fails, we bail - * out of the driver support completely. For this, we need to release - * any memory allocated and unregister any instance of SHM net device. - */ - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - if (shmdev_lyr[i].pshm_netdev) - unregister_netdev(shmdev_lyr[i].pshm_netdev); - } - return result; -} - -static void __exit caif_shmdev_exit(void) -{ - int i; - - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - caif_shmcore_remove(shmdev_lyr[i].pshm_netdev); - kfree((void *)shmdev_lyr[i].shm_base_addr); - } - -} - -module_init(caif_shmdev_init); -module_exit(caif_shmdev_exit); diff --git a/drivers/net/caif/caif_shmcore.c b/drivers/net/caif/caif_shmcore.c deleted file mode 100644 index cca2afc945a..00000000000 --- a/drivers/net/caif/caif_shmcore.c +++ /dev/null @@ -1,744 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Authors: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com, - * Daniel Martensson / daniel.martensson@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":" fmt - -#include -#include -#include -#include -#include -#include - -#include -#include - -#define NR_TX_BUF 6 -#define NR_RX_BUF 6 -#define TX_BUF_SZ 0x2000 -#define RX_BUF_SZ 0x2000 - -#define CAIF_NEEDED_HEADROOM 32 - -#define CAIF_FLOW_ON 1 -#define CAIF_FLOW_OFF 0 - -#define LOW_WATERMARK 3 -#define HIGH_WATERMARK 4 - -/* Maximum number of CAIF buffers per shared memory buffer. */ -#define SHM_MAX_FRMS_PER_BUF 10 - -/* - * Size in bytes of the descriptor area - * (With end of descriptor signalling) - */ -#define SHM_CAIF_DESC_SIZE ((SHM_MAX_FRMS_PER_BUF + 1) * \ - sizeof(struct shm_pck_desc)) - -/* - * Offset to the first CAIF frame within a shared memory buffer. - * Aligned on 32 bytes. - */ -#define SHM_CAIF_FRM_OFS (SHM_CAIF_DESC_SIZE + (SHM_CAIF_DESC_SIZE % 32)) - -/* Number of bytes for CAIF shared memory header. */ -#define SHM_HDR_LEN 1 - -/* Number of padding bytes for the complete CAIF frame. */ -#define SHM_FRM_PAD_LEN 4 - -#define CAIF_MAX_MTU 4096 - -#define SHM_SET_FULL(x) (((x+1) & 0x0F) << 0) -#define SHM_GET_FULL(x) (((x >> 0) & 0x0F) - 1) - -#define SHM_SET_EMPTY(x) (((x+1) & 0x0F) << 4) -#define SHM_GET_EMPTY(x) (((x >> 4) & 0x0F) - 1) - -#define SHM_FULL_MASK (0x0F << 0) -#define SHM_EMPTY_MASK (0x0F << 4) - -struct shm_pck_desc { - /* - * Offset from start of shared memory area to start of - * shared memory CAIF frame. - */ - u32 frm_ofs; - u32 frm_len; -}; - -struct buf_list { - unsigned char *desc_vptr; - u32 phy_addr; - u32 index; - u32 len; - u32 frames; - u32 frm_ofs; - struct list_head list; -}; - -struct shm_caif_frm { - /* Number of bytes of padding before the CAIF frame. */ - u8 hdr_ofs; -}; - -struct shmdrv_layer { - /* caif_dev_common must always be first in the structure*/ - struct caif_dev_common cfdev; - - u32 shm_tx_addr; - u32 shm_rx_addr; - u32 shm_base_addr; - u32 tx_empty_available; - spinlock_t lock; - - struct list_head tx_empty_list; - struct list_head tx_pend_list; - struct list_head tx_full_list; - struct list_head rx_empty_list; - struct list_head rx_pend_list; - struct list_head rx_full_list; - - struct workqueue_struct *pshm_tx_workqueue; - struct workqueue_struct *pshm_rx_workqueue; - - struct work_struct shm_tx_work; - struct work_struct shm_rx_work; - - struct sk_buff_head sk_qhead; - struct shmdev_layer *pshm_dev; -}; - -static int shm_netdev_open(struct net_device *shm_netdev) -{ - netif_wake_queue(shm_netdev); - return 0; -} - -static int shm_netdev_close(struct net_device *shm_netdev) -{ - netif_stop_queue(shm_netdev); - return 0; -} - -int caif_shmdrv_rx_cb(u32 mbx_msg, void *priv) -{ - struct buf_list *pbuf; - struct shmdrv_layer *pshm_drv; - struct list_head *pos; - u32 avail_emptybuff = 0; - unsigned long flags = 0; - - pshm_drv = priv; - - /* Check for received buffers. */ - if (mbx_msg & SHM_FULL_MASK) { - int idx; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check whether we have any outstanding buffers. */ - if (list_empty(&pshm_drv->rx_empty_list)) { - - /* Release spin lock. */ - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* We print even in IRQ context... */ - pr_warn("No empty Rx buffers to fill: " - "mbx_msg:%x\n", mbx_msg); - - /* Bail out. */ - goto err_sync; - } - - pbuf = - list_entry(pshm_drv->rx_empty_list.next, - struct buf_list, list); - idx = pbuf->index; - - /* Check buffer synchronization. */ - if (idx != SHM_GET_FULL(mbx_msg)) { - - /* We print even in IRQ context... */ - pr_warn( - "phyif_shm_mbx_msg_cb: RX full out of sync:" - " idx:%d, msg:%x SHM_GET_FULL(mbx_msg):%x\n", - idx, mbx_msg, SHM_GET_FULL(mbx_msg)); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Bail out. */ - goto err_sync; - } - - list_del_init(&pbuf->list); - list_add_tail(&pbuf->list, &pshm_drv->rx_full_list); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Schedule RX work queue. */ - if (!work_pending(&pshm_drv->shm_rx_work)) - queue_work(pshm_drv->pshm_rx_workqueue, - &pshm_drv->shm_rx_work); - } - - /* Check for emptied buffers. */ - if (mbx_msg & SHM_EMPTY_MASK) { - int idx; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check whether we have any outstanding buffers. */ - if (list_empty(&pshm_drv->tx_full_list)) { - - /* We print even in IRQ context... */ - pr_warn("No TX to empty: msg:%x\n", mbx_msg); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Bail out. */ - goto err_sync; - } - - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - idx = pbuf->index; - - /* Check buffer synchronization. */ - if (idx != SHM_GET_EMPTY(mbx_msg)) { - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* We print even in IRQ context... */ - pr_warn("TX empty " - "out of sync:idx:%d, msg:%x\n", idx, mbx_msg); - - /* Bail out. */ - goto err_sync; - } - list_del_init(&pbuf->list); - - /* Reset buffer parameters. */ - pbuf->frames = 0; - pbuf->frm_ofs = SHM_CAIF_FRM_OFS; - - list_add_tail(&pbuf->list, &pshm_drv->tx_empty_list); - - /* Check the available no. of buffers in the empty list */ - list_for_each(pos, &pshm_drv->tx_empty_list) - avail_emptybuff++; - - /* Check whether we have to wake up the transmitter. */ - if ((avail_emptybuff > HIGH_WATERMARK) && - (!pshm_drv->tx_empty_available)) { - pshm_drv->tx_empty_available = 1; - spin_unlock_irqrestore(&pshm_drv->lock, flags); - pshm_drv->cfdev.flowctrl - (pshm_drv->pshm_dev->pshm_netdev, - CAIF_FLOW_ON); - - - /* Schedule the work queue. if required */ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, - &pshm_drv->shm_tx_work); - } else - spin_unlock_irqrestore(&pshm_drv->lock, flags); - } - - return 0; - -err_sync: - return -EIO; -} - -static void shm_rx_work_func(struct work_struct *rx_work) -{ - struct shmdrv_layer *pshm_drv; - struct buf_list *pbuf; - unsigned long flags = 0; - struct sk_buff *skb; - char *p; - int ret; - - pshm_drv = container_of(rx_work, struct shmdrv_layer, shm_rx_work); - - while (1) { - - struct shm_pck_desc *pck_desc; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check for received buffers. */ - if (list_empty(&pshm_drv->rx_full_list)) { - spin_unlock_irqrestore(&pshm_drv->lock, flags); - break; - } - - pbuf = - list_entry(pshm_drv->rx_full_list.next, struct buf_list, - list); - list_del_init(&pbuf->list); - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Retrieve pointer to start of the packet descriptor area. */ - pck_desc = (struct shm_pck_desc *) pbuf->desc_vptr; - - /* - * Check whether descriptor contains a CAIF shared memory - * frame. - */ - while (pck_desc->frm_ofs) { - unsigned int frm_buf_ofs; - unsigned int frm_pck_ofs; - unsigned int frm_pck_len; - /* - * Check whether offset is within buffer limits - * (lower). - */ - if (pck_desc->frm_ofs < - (pbuf->phy_addr - pshm_drv->shm_base_addr)) - break; - /* - * Check whether offset is within buffer limits - * (higher). - */ - if (pck_desc->frm_ofs > - ((pbuf->phy_addr - pshm_drv->shm_base_addr) + - pbuf->len)) - break; - - /* Calculate offset from start of buffer. */ - frm_buf_ofs = - pck_desc->frm_ofs - (pbuf->phy_addr - - pshm_drv->shm_base_addr); - - /* - * Calculate offset and length of CAIF packet while - * taking care of the shared memory header. - */ - frm_pck_ofs = - frm_buf_ofs + SHM_HDR_LEN + - (*(pbuf->desc_vptr + frm_buf_ofs)); - frm_pck_len = - (pck_desc->frm_len - SHM_HDR_LEN - - (*(pbuf->desc_vptr + frm_buf_ofs))); - - /* Check whether CAIF packet is within buffer limits */ - if ((frm_pck_ofs + pck_desc->frm_len) > pbuf->len) - break; - - /* Get a suitable CAIF packet and copy in data. */ - skb = netdev_alloc_skb(pshm_drv->pshm_dev->pshm_netdev, - frm_pck_len + 1); - if (skb == NULL) - break; - - p = skb_put(skb, frm_pck_len); - memcpy(p, pbuf->desc_vptr + frm_pck_ofs, frm_pck_len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = pshm_drv->pshm_dev->pshm_netdev; - - /* Push received packet up the stack. */ - ret = netif_rx_ni(skb); - - if (!ret) { - pshm_drv->pshm_dev->pshm_netdev->stats. - rx_packets++; - pshm_drv->pshm_dev->pshm_netdev->stats. - rx_bytes += pck_desc->frm_len; - } else - ++pshm_drv->pshm_dev->pshm_netdev->stats. - rx_dropped; - /* Move to next packet descriptor. */ - pck_desc++; - } - - spin_lock_irqsave(&pshm_drv->lock, flags); - list_add_tail(&pbuf->list, &pshm_drv->rx_pend_list); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - } - - /* Schedule the work queue. if required */ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work); - -} - -static void shm_tx_work_func(struct work_struct *tx_work) -{ - u32 mbox_msg; - unsigned int frmlen, avail_emptybuff, append = 0; - unsigned long flags = 0; - struct buf_list *pbuf = NULL; - struct shmdrv_layer *pshm_drv; - struct shm_caif_frm *frm; - struct sk_buff *skb; - struct shm_pck_desc *pck_desc; - struct list_head *pos; - - pshm_drv = container_of(tx_work, struct shmdrv_layer, shm_tx_work); - - do { - /* Initialize mailbox message. */ - mbox_msg = 0x00; - avail_emptybuff = 0; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check for pending receive buffers. */ - if (!list_empty(&pshm_drv->rx_pend_list)) { - - pbuf = list_entry(pshm_drv->rx_pend_list.next, - struct buf_list, list); - - list_del_init(&pbuf->list); - list_add_tail(&pbuf->list, &pshm_drv->rx_empty_list); - /* - * Value index is never changed, - * so read access should be safe. - */ - mbox_msg |= SHM_SET_EMPTY(pbuf->index); - } - - skb = skb_peek(&pshm_drv->sk_qhead); - - if (skb == NULL) - goto send_msg; - /* Check the available no. of buffers in the empty list */ - list_for_each(pos, &pshm_drv->tx_empty_list) - avail_emptybuff++; - - if ((avail_emptybuff < LOW_WATERMARK) && - pshm_drv->tx_empty_available) { - /* Update blocking condition. */ - pshm_drv->tx_empty_available = 0; - spin_unlock_irqrestore(&pshm_drv->lock, flags); - pshm_drv->cfdev.flowctrl - (pshm_drv->pshm_dev->pshm_netdev, - CAIF_FLOW_OFF); - spin_lock_irqsave(&pshm_drv->lock, flags); - } - /* - * We simply return back to the caller if we do not have space - * either in Tx pending list or Tx empty list. In this case, - * we hold the received skb in the skb list, waiting to - * be transmitted once Tx buffers become available - */ - if (list_empty(&pshm_drv->tx_empty_list)) - goto send_msg; - - /* Get the first free Tx buffer. */ - pbuf = list_entry(pshm_drv->tx_empty_list.next, - struct buf_list, list); - do { - if (append) { - skb = skb_peek(&pshm_drv->sk_qhead); - if (skb == NULL) - break; - } - - frm = (struct shm_caif_frm *) - (pbuf->desc_vptr + pbuf->frm_ofs); - - frm->hdr_ofs = 0; - frmlen = 0; - frmlen += SHM_HDR_LEN + frm->hdr_ofs + skb->len; - - /* Add tail padding if needed. */ - if (frmlen % SHM_FRM_PAD_LEN) - frmlen += SHM_FRM_PAD_LEN - - (frmlen % SHM_FRM_PAD_LEN); - - /* - * Verify that packet, header and additional padding - * can fit within the buffer frame area. - */ - if (frmlen >= (pbuf->len - pbuf->frm_ofs)) - break; - - if (!append) { - list_del_init(&pbuf->list); - append = 1; - } - - skb = skb_dequeue(&pshm_drv->sk_qhead); - if (skb == NULL) - break; - /* Copy in CAIF frame. */ - skb_copy_bits(skb, 0, pbuf->desc_vptr + - pbuf->frm_ofs + SHM_HDR_LEN + - frm->hdr_ofs, skb->len); - - pshm_drv->pshm_dev->pshm_netdev->stats.tx_packets++; - pshm_drv->pshm_dev->pshm_netdev->stats.tx_bytes += - frmlen; - dev_kfree_skb_irq(skb); - - /* Fill in the shared memory packet descriptor area. */ - pck_desc = (struct shm_pck_desc *) (pbuf->desc_vptr); - /* Forward to current frame. */ - pck_desc += pbuf->frames; - pck_desc->frm_ofs = (pbuf->phy_addr - - pshm_drv->shm_base_addr) + - pbuf->frm_ofs; - pck_desc->frm_len = frmlen; - /* Terminate packet descriptor area. */ - pck_desc++; - pck_desc->frm_ofs = 0; - /* Update buffer parameters. */ - pbuf->frames++; - pbuf->frm_ofs += frmlen + (frmlen % 32); - - } while (pbuf->frames < SHM_MAX_FRMS_PER_BUF); - - /* Assign buffer as full. */ - list_add_tail(&pbuf->list, &pshm_drv->tx_full_list); - append = 0; - mbox_msg |= SHM_SET_FULL(pbuf->index); -send_msg: - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - if (mbox_msg) - pshm_drv->pshm_dev->pshmdev_mbxsend - (pshm_drv->pshm_dev->shm_id, mbox_msg); - } while (mbox_msg); -} - -static int shm_netdev_tx(struct sk_buff *skb, struct net_device *shm_netdev) -{ - struct shmdrv_layer *pshm_drv; - - pshm_drv = netdev_priv(shm_netdev); - - skb_queue_tail(&pshm_drv->sk_qhead, skb); - - /* Schedule Tx work queue. for deferred processing of skbs*/ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work); - - return 0; -} - -static const struct net_device_ops netdev_ops = { - .ndo_open = shm_netdev_open, - .ndo_stop = shm_netdev_close, - .ndo_start_xmit = shm_netdev_tx, -}; - -static void shm_netdev_setup(struct net_device *pshm_netdev) -{ - struct shmdrv_layer *pshm_drv; - pshm_netdev->netdev_ops = &netdev_ops; - - pshm_netdev->mtu = CAIF_MAX_MTU; - pshm_netdev->type = ARPHRD_CAIF; - pshm_netdev->hard_header_len = CAIF_NEEDED_HEADROOM; - pshm_netdev->tx_queue_len = 0; - pshm_netdev->destructor = free_netdev; - - pshm_drv = netdev_priv(pshm_netdev); - - /* Initialize structures in a clean state. */ - memset(pshm_drv, 0, sizeof(struct shmdrv_layer)); - - pshm_drv->cfdev.link_select = CAIF_LINK_LOW_LATENCY; -} - -int caif_shmcore_probe(struct shmdev_layer *pshm_dev) -{ - int result, j; - struct shmdrv_layer *pshm_drv = NULL; - - pshm_dev->pshm_netdev = alloc_netdev(sizeof(struct shmdrv_layer), - "cfshm%d", shm_netdev_setup); - if (!pshm_dev->pshm_netdev) - return -ENOMEM; - - pshm_drv = netdev_priv(pshm_dev->pshm_netdev); - pshm_drv->pshm_dev = pshm_dev; - - /* - * Initialization starts with the verification of the - * availability of MBX driver by calling its setup function. - * MBX driver must be available by this time for proper - * functioning of SHM driver. - */ - if ((pshm_dev->pshmdev_mbxsetup - (caif_shmdrv_rx_cb, pshm_dev, pshm_drv)) != 0) { - pr_warn("Could not config. SHM Mailbox," - " Bailing out.....\n"); - free_netdev(pshm_dev->pshm_netdev); - return -ENODEV; - } - - skb_queue_head_init(&pshm_drv->sk_qhead); - - pr_info("SHM DEVICE[%d] PROBED BY DRIVER, NEW SHM DRIVER" - " INSTANCE AT pshm_drv =0x%p\n", - pshm_drv->pshm_dev->shm_id, pshm_drv); - - if (pshm_dev->shm_total_sz < - (NR_TX_BUF * TX_BUF_SZ + NR_RX_BUF * RX_BUF_SZ)) { - - pr_warn("ERROR, Amount of available" - " Phys. SHM cannot accommodate current SHM " - "driver configuration, Bailing out ...\n"); - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - - pshm_drv->shm_base_addr = pshm_dev->shm_base_addr; - pshm_drv->shm_tx_addr = pshm_drv->shm_base_addr; - - if (pshm_dev->shm_loopback) - pshm_drv->shm_rx_addr = pshm_drv->shm_tx_addr; - else - pshm_drv->shm_rx_addr = pshm_dev->shm_base_addr + - (NR_TX_BUF * TX_BUF_SZ); - - spin_lock_init(&pshm_drv->lock); - INIT_LIST_HEAD(&pshm_drv->tx_empty_list); - INIT_LIST_HEAD(&pshm_drv->tx_pend_list); - INIT_LIST_HEAD(&pshm_drv->tx_full_list); - - INIT_LIST_HEAD(&pshm_drv->rx_empty_list); - INIT_LIST_HEAD(&pshm_drv->rx_pend_list); - INIT_LIST_HEAD(&pshm_drv->rx_full_list); - - INIT_WORK(&pshm_drv->shm_tx_work, shm_tx_work_func); - INIT_WORK(&pshm_drv->shm_rx_work, shm_rx_work_func); - - pshm_drv->pshm_tx_workqueue = - create_singlethread_workqueue("shm_tx_work"); - pshm_drv->pshm_rx_workqueue = - create_singlethread_workqueue("shm_rx_work"); - - for (j = 0; j < NR_TX_BUF; j++) { - struct buf_list *tx_buf = - kmalloc(sizeof(struct buf_list), GFP_KERNEL); - - if (tx_buf == NULL) { - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - tx_buf->index = j; - tx_buf->phy_addr = pshm_drv->shm_tx_addr + (TX_BUF_SZ * j); - tx_buf->len = TX_BUF_SZ; - tx_buf->frames = 0; - tx_buf->frm_ofs = SHM_CAIF_FRM_OFS; - - if (pshm_dev->shm_loopback) - tx_buf->desc_vptr = (unsigned char *)tx_buf->phy_addr; - else - /* - * FIXME: the result of ioremap is not a pointer - arnd - */ - tx_buf->desc_vptr = - ioremap(tx_buf->phy_addr, TX_BUF_SZ); - - list_add_tail(&tx_buf->list, &pshm_drv->tx_empty_list); - } - - for (j = 0; j < NR_RX_BUF; j++) { - struct buf_list *rx_buf = - kmalloc(sizeof(struct buf_list), GFP_KERNEL); - - if (rx_buf == NULL) { - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - rx_buf->index = j; - rx_buf->phy_addr = pshm_drv->shm_rx_addr + (RX_BUF_SZ * j); - rx_buf->len = RX_BUF_SZ; - - if (pshm_dev->shm_loopback) - rx_buf->desc_vptr = (unsigned char *)rx_buf->phy_addr; - else - rx_buf->desc_vptr = - ioremap(rx_buf->phy_addr, RX_BUF_SZ); - list_add_tail(&rx_buf->list, &pshm_drv->rx_empty_list); - } - - pshm_drv->tx_empty_available = 1; - result = register_netdev(pshm_dev->pshm_netdev); - if (result) - pr_warn("ERROR[%d], SHM could not, " - "register with NW FRMWK Bailing out ...\n", result); - - return result; -} - -void caif_shmcore_remove(struct net_device *pshm_netdev) -{ - struct buf_list *pbuf; - struct shmdrv_layer *pshm_drv = NULL; - - pshm_drv = netdev_priv(pshm_netdev); - - while (!(list_empty(&pshm_drv->tx_pend_list))) { - pbuf = - list_entry(pshm_drv->tx_pend_list.next, - struct buf_list, list); - - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->tx_full_list))) { - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->tx_empty_list))) { - pbuf = - list_entry(pshm_drv->tx_empty_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_full_list))) { - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_pend_list))) { - pbuf = - list_entry(pshm_drv->tx_pend_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_empty_list))) { - pbuf = - list_entry(pshm_drv->rx_empty_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - /* Destroy work queues. */ - destroy_workqueue(pshm_drv->pshm_tx_workqueue); - destroy_workqueue(pshm_drv->pshm_rx_workqueue); - - unregister_netdev(pshm_netdev); -} diff --git a/include/net/caif/caif_shm.h b/include/net/caif/caif_shm.h deleted file mode 100644 index 5bcce55438c..00000000000 --- a/include/net/caif/caif_shm.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#ifndef CAIF_SHM_H_ -#define CAIF_SHM_H_ - -struct shmdev_layer { - u32 shm_base_addr; - u32 shm_total_sz; - u32 shm_id; - u32 shm_loopback; - void *hmbx; - int (*pshmdev_mbxsend) (u32 shm_id, u32 mbx_msg); - int (*pshmdev_mbxsetup) (void *pshmdrv_cb, - struct shmdev_layer *pshm_dev, void *pshm_drv); - struct net_device *pshm_netdev; -}; - -extern int caif_shmcore_probe(struct shmdev_layer *pshm_dev); -extern void caif_shmcore_remove(struct net_device *pshm_netdev); - -#endif -- cgit v1.2.3-70-g09d2 From 6681712d67eef14c4ce793561c3231659153a320 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 15 Mar 2013 04:35:51 +0000 Subject: vxlan: generalize forwarding tables This patch generalizes VXLAN forwarding table entries allowing an administrator to: 1) specify multiple destinations for a given MAC 2) specify alternate vni's in the VXLAN header 3) specify alternate destination UDP ports 4) use multicast MAC addresses as fdb lookup keys 5) specify multicast destinations 6) specify the outgoing interface for forwarded packets The combination allows configuration of more complex topologies using VXLAN encapsulation. Changes since v1: rebase to 3.9.0-rc2 Signed-Off-By: David L Stevens Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 263 ++++++++++++++++++++++++++++++++--------- include/uapi/linux/neighbour.h | 3 + net/core/rtnetlink.c | 2 +- 3 files changed, 210 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index db0df07c18d..33427fd6251 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -81,13 +81,22 @@ struct vxlan_net { struct hlist_head vni_list[VNI_HASH_SIZE]; }; +struct vxlan_rdst { + struct rcu_head rcu; + __be32 remote_ip; + __be16 remote_port; + u32 remote_vni; + u32 remote_ifindex; + struct vxlan_rdst *remote_next; +}; + /* Forwarding table entry */ struct vxlan_fdb { struct hlist_node hlist; /* linked list of entries */ struct rcu_head rcu; unsigned long updated; /* jiffies */ unsigned long used; - __be32 remote_ip; + struct vxlan_rdst remote; u16 state; /* see ndm_state */ u8 eth_addr[ETH_ALEN]; }; @@ -157,7 +166,8 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) /* Fill in neighbour message in skbuff. */ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, const struct vxlan_fdb *fdb, - u32 portid, u32 seq, int type, unsigned int flags) + u32 portid, u32 seq, int type, unsigned int flags, + const struct vxlan_rdst *rdst) { unsigned long now = jiffies; struct nda_cacheinfo ci; @@ -176,7 +186,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (type == RTM_GETNEIGH) { ndm->ndm_family = AF_INET; - send_ip = fdb->remote_ip != 0; + send_ip = rdst->remote_ip != htonl(INADDR_ANY); send_eth = !is_zero_ether_addr(fdb->eth_addr); } else ndm->ndm_family = AF_BRIDGE; @@ -188,7 +198,17 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; - if (send_ip && nla_put_be32(skb, NDA_DST, fdb->remote_ip)) + if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip)) + goto nla_put_failure; + + if (rdst->remote_port && rdst->remote_port != vxlan_port && + nla_put_be16(skb, NDA_PORT, rdst->remote_port)) + goto nla_put_failure; + if (rdst->remote_vni != vxlan->vni && + nla_put_be32(skb, NDA_VNI, rdst->remote_vni)) + goto nla_put_failure; + if (rdst->remote_ifindex && + nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); @@ -211,6 +231,9 @@ static inline size_t vxlan_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + nla_total_size(sizeof(__be32)) /* NDA_DST */ + + nla_total_size(sizeof(__be32)) /* NDA_PORT */ + + nla_total_size(sizeof(__be32)) /* NDA_VNI */ + + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -225,7 +248,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan, if (skb == NULL) goto errout; - err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0); + err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote); if (err < 0) { /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -247,7 +270,8 @@ static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) memset(&f, 0, sizeof f); f.state = NUD_STALE; - f.remote_ip = ipa; /* goes to NDA_DST */ + f.remote.remote_ip = ipa; /* goes to NDA_DST */ + f.remote.remote_vni = VXLAN_N_VID; vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); } @@ -300,10 +324,38 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, return NULL; } +/* Add/update destinations for multicast */ +static int vxlan_fdb_append(struct vxlan_fdb *f, + __be32 ip, __u32 port, __u32 vni, __u32 ifindex) +{ + struct vxlan_rdst *rd_prev, *rd; + + rd_prev = NULL; + for (rd = &f->remote; rd; rd = rd->remote_next) { + if (rd->remote_ip == ip && + rd->remote_port == port && + rd->remote_vni == vni && + rd->remote_ifindex == ifindex) + return 0; + rd_prev = rd; + } + rd = kmalloc(sizeof(*rd), GFP_ATOMIC); + if (rd == NULL) + return -ENOBUFS; + rd->remote_ip = ip; + rd->remote_port = port; + rd->remote_vni = vni; + rd->remote_ifindex = ifindex; + rd->remote_next = NULL; + rd_prev->remote_next = rd; + return 1; +} + /* Add new entry to forwarding table -- assumes lock held */ static int vxlan_fdb_create(struct vxlan_dev *vxlan, const u8 *mac, __be32 ip, - __u16 state, __u16 flags) + __u16 state, __u16 flags, + __u32 port, __u32 vni, __u32 ifindex) { struct vxlan_fdb *f; int notify = 0; @@ -320,6 +372,14 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, f->updated = jiffies; notify = 1; } + if ((flags & NLM_F_APPEND) && + is_multicast_ether_addr(f->eth_addr)) { + int rc = vxlan_fdb_append(f, ip, port, vni, ifindex); + + if (rc < 0) + return rc; + notify |= rc; + } } else { if (!(flags & NLM_F_CREATE)) return -ENOENT; @@ -333,7 +393,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, return -ENOMEM; notify = 1; - f->remote_ip = ip; + f->remote.remote_ip = ip; + f->remote.remote_port = port; + f->remote.remote_vni = vni; + f->remote.remote_ifindex = ifindex; + f->remote.remote_next = NULL; f->state = state; f->updated = f->used = jiffies; memcpy(f->eth_addr, mac, ETH_ALEN); @@ -349,6 +413,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, return 0; } +void vxlan_fdb_free(struct rcu_head *head) +{ + struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); + + while (f->remote.remote_next) { + struct vxlan_rdst *rd = f->remote.remote_next; + + f->remote.remote_next = rd->remote_next; + kfree(rd); + } + kfree(f); +} + static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) { netdev_dbg(vxlan->dev, @@ -358,7 +435,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH); hlist_del_rcu(&f->hlist); - kfree_rcu(f, rcu); + call_rcu(&f->rcu, vxlan_fdb_free); } /* Add static entry (via netlink) */ @@ -367,7 +444,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, u16 flags) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct net *net = dev_net(vxlan->dev); __be32 ip; + u32 port, vni, ifindex; int err; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { @@ -384,8 +463,36 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], ip = nla_get_be32(tb[NDA_DST]); + if (tb[NDA_PORT]) { + if (nla_len(tb[NDA_PORT]) != sizeof(u32)) + return -EINVAL; + port = nla_get_u32(tb[NDA_PORT]); + } else + port = vxlan_port; + + if (tb[NDA_VNI]) { + if (nla_len(tb[NDA_VNI]) != sizeof(u32)) + return -EINVAL; + vni = nla_get_u32(tb[NDA_VNI]); + } else + vni = vxlan->vni; + + if (tb[NDA_IFINDEX]) { + struct net_device *dev; + + if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) + return -EINVAL; + ifindex = nla_get_u32(tb[NDA_IFINDEX]); + dev = dev_get_by_index(net, ifindex); + if (!dev) + return -EADDRNOTAVAIL; + dev_put(dev); + } else + ifindex = 0; + spin_lock_bh(&vxlan->hash_lock); - err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags); + err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, port, + vni, ifindex); spin_unlock_bh(&vxlan->hash_lock); return err; @@ -423,18 +530,21 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, int err; hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { - if (idx < cb->args[0]) - goto skip; - - err = vxlan_fdb_info(skb, vxlan, f, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI); - if (err < 0) - break; + struct vxlan_rdst *rd; + for (rd = &f->remote; rd; rd = rd->remote_next) { + if (idx < cb->args[0]) + goto skip; + + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI, rd); + if (err < 0) + break; skip: - ++idx; + ++idx; + } } } @@ -454,22 +564,23 @@ static void vxlan_snoop(struct net_device *dev, f = vxlan_find_mac(vxlan, src_mac); if (likely(f)) { f->used = jiffies; - if (likely(f->remote_ip == src_ip)) + if (likely(f->remote.remote_ip == src_ip)) return; if (net_ratelimit()) netdev_info(dev, "%pM migrated from %pI4 to %pI4\n", - src_mac, &f->remote_ip, &src_ip); + src_mac, &f->remote.remote_ip, &src_ip); - f->remote_ip = src_ip; + f->remote.remote_ip = src_ip; f->updated = jiffies; } else { /* learned new entry */ spin_lock(&vxlan->hash_lock); err = vxlan_fdb_create(vxlan, src_mac, src_ip, NUD_REACHABLE, - NLM_F_EXCL|NLM_F_CREATE); + NLM_F_EXCL|NLM_F_CREATE, + vxlan_port, vxlan->vni, 0); spin_unlock(&vxlan->hash_lock); } } @@ -701,7 +812,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) } f = vxlan_find_mac(vxlan, n->ha); - if (f && f->remote_ip == 0) { + if (f && f->remote.remote_ip == htonl(INADDR_ANY)) { /* bridge-local neighbor */ neigh_release(n); goto out; @@ -834,47 +945,26 @@ static int handle_offloads(struct sk_buff *skb) return 0; } -/* Transmit local packets over Vxlan - * - * Outer IP header inherits ECN and DF from inner header. - * Outer UDP destination is the VXLAN assigned port. - * source port is based on hash of flow - */ -static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, + struct vxlan_rdst *rdst, bool did_rsc) { struct vxlan_dev *vxlan = netdev_priv(dev); struct rtable *rt; const struct iphdr *old_iph; - struct ethhdr *eth; struct iphdr *iph; struct vxlanhdr *vxh; struct udphdr *uh; struct flowi4 fl4; unsigned int pkt_len = skb->len; __be32 dst; - __u16 src_port; + __u16 src_port, dst_port; + u32 vni; __be16 df = 0; __u8 tos, ttl; - bool did_rsc = false; - const struct vxlan_fdb *f; - - skb_reset_mac_header(skb); - eth = eth_hdr(skb); - - if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) - return arp_reduce(dev, skb); - else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP) - did_rsc = route_shortcircuit(dev, skb); - f = vxlan_find_mac(vxlan, eth->h_dest); - if (f == NULL) { - did_rsc = false; - dst = vxlan->gaddr; - if (!dst && (vxlan->flags & VXLAN_F_L2MISS) && - !is_multicast_ether_addr(eth->h_dest)) - vxlan_fdb_miss(vxlan, eth->h_dest); - } else - dst = f->remote_ip; + dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port; + vni = rdst->remote_vni; + dst = rdst->remote_ip; if (!dst) { if (did_rsc) { @@ -922,7 +1012,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) src_port = vxlan_src_port(vxlan, skb); memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = vxlan->link; + fl4.flowi4_oif = rdst->remote_ifindex; fl4.flowi4_tos = RT_TOS(tos); fl4.daddr = dst; fl4.saddr = vxlan->saddr; @@ -949,13 +1039,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); - vxh->vx_vni = htonl(vxlan->vni << 8); + vxh->vx_vni = htonl(vni << 8); __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); - uh->dest = htons(vxlan_port); + uh->dest = htons(dst_port); uh->source = htons(src_port); uh->len = htons(skb->len); @@ -995,6 +1085,64 @@ tx_free: return NETDEV_TX_OK; } +/* Transmit local packets over Vxlan + * + * Outer IP header inherits ECN and DF from inner header. + * Outer UDP destination is the VXLAN assigned port. + * source port is based on hash of flow + */ +static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct ethhdr *eth; + bool did_rsc = false; + struct vxlan_rdst group, *rdst0, *rdst; + struct vxlan_fdb *f; + int rc1, rc; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) + return arp_reduce(dev, skb); + else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP) + did_rsc = route_shortcircuit(dev, skb); + + f = vxlan_find_mac(vxlan, eth->h_dest); + if (f == NULL) { + did_rsc = false; + group.remote_port = vxlan_port; + group.remote_vni = vxlan->vni; + group.remote_ip = vxlan->gaddr; + group.remote_ifindex = vxlan->link; + group.remote_next = 0; + rdst0 = &group; + + if (group.remote_ip == htonl(INADDR_ANY) && + (vxlan->flags & VXLAN_F_L2MISS) && + !is_multicast_ether_addr(eth->h_dest)) + vxlan_fdb_miss(vxlan, eth->h_dest); + } else + rdst0 = &f->remote; + + rc = NETDEV_TX_OK; + + /* if there are multiple destinations, send copies */ + for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) { + struct sk_buff *skb1; + + skb1 = skb_clone(skb, GFP_ATOMIC); + rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc); + if (rc == NETDEV_TX_OK) + rc = rc1; + } + + rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc); + if (rc == NETDEV_TX_OK) + rc = rc1; + return rc; +} + /* Walk the forwarding table and purge stale entries */ static void vxlan_cleanup(unsigned long arg) { @@ -1558,6 +1706,7 @@ static void __exit vxlan_cleanup_module(void) { rtnl_link_unregister(&vxlan_link_ops); unregister_pernet_device(&vxlan_net_ops); + rcu_barrier(); } module_exit(vxlan_cleanup_module); diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index adb068c53c4..f175212420a 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -21,6 +21,9 @@ enum { NDA_CACHEINFO, NDA_PROBES, NDA_VLAN, + NDA_PORT, + NDA_VNI, + NDA_IFINDEX, __NDA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 55b5624a4b0..0e86baf8f80 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2112,7 +2112,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } addr = nla_data(tb[NDA_LLADDR]); - if (!is_valid_ether_addr(addr)) { + if (is_zero_ether_addr(addr)) { pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); return -EINVAL; } -- cgit v1.2.3-70-g09d2 From a362db3d6c8a952cbde510b1fa35d0ee001b347e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 16 Mar 2013 04:47:55 +0000 Subject: net: fix some typos in netif features Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index f5e797c0c2a..d6ee2d008ee 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -102,8 +102,8 @@ enum { #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) -#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) -#define NETIF_F_UDP_TUNNEL __NETIF_F(UDP_TUNNEL) +#define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) +#define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ -- cgit v1.2.3-70-g09d2 From 1a2c6181c4a1922021b4d7df373bba612c3e5f04 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sun, 17 Mar 2013 08:23:34 +0000 Subject: tcp: Remove TCPCT TCPCT uses option-number 253, reserved for experimental use and should not be used in production environments. Further, TCPCT does not fully implement RFC 6013. As a nice side-effect, removing TCPCT increases TCP's performance for very short flows: Doing an apache-benchmark with -c 100 -n 100000, sending HTTP-requests for files of 1KB size. before this patch: average (among 7 runs) of 20845.5 Requests/Second after: average (among 7 runs) of 21403.6 Requests/Second Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 - drivers/infiniband/hw/cxgb4/cm.c | 2 +- include/linux/tcp.h | 10 -- include/net/request_sock.h | 8 +- include/net/tcp.h | 89 +---------- include/uapi/linux/tcp.h | 26 ---- net/dccp/ipv4.c | 5 +- net/dccp/ipv6.c | 5 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/syncookies.c | 3 +- net/ipv4/sysctl_net_ipv4.c | 7 - net/ipv4/tcp.c | 267 --------------------------------- net/ipv4/tcp_input.c | 69 +-------- net/ipv4/tcp_ipv4.c | 60 +------- net/ipv4/tcp_minisocks.c | 40 +---- net/ipv4/tcp_output.c | 219 +-------------------------- net/ipv6/syncookies.c | 3 +- net/ipv6/tcp_ipv6.c | 56 +------ 18 files changed, 38 insertions(+), 841 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 18a24c405ac..17953e2bc3e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -175,14 +175,6 @@ tcp_congestion_control - STRING is inherited. [see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ] -tcp_cookie_size - INTEGER - Default size of TCP Cookie Transactions (TCPCT) option, that may be - overridden on a per socket basis by the TCPCT socket option. - Values greater than the maximum (16) are interpreted as the maximum. - Values greater than zero and less than the minimum (8) are interpreted - as the minimum. Odd values are interpreted as the next even value. - Default: 0 (off). - tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8dcc84fd9d3..54fd31fcc33 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2915,7 +2915,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 763c108ee03..ed6a7456eec 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -90,9 +90,6 @@ struct tcp_options_received { sack_ok : 4, /* SACK seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ - u8 cookie_plus:6, /* bytes in authenticator/cookie option */ - cookie_out_never:1, - cookie_in_always:1; u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ @@ -102,7 +99,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; - rx_opt->cookie_plus = 0; } /* This is the max number of SACKS that we'll generate and process. It's safe @@ -320,12 +316,6 @@ struct tcp_sock { struct tcp_md5sig_info __rcu *md5sig_info; #endif - /* When the cookie options are generated and exchanged, then this - * object holds a reference to them (cookie_values->kref). Also - * contains related tcp_cookie_transactions fields. - */ - struct tcp_cookie_values *cookie_values; - /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; /* fastopen_rsk points to request_sock that resulted in this big diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a51dbd17c2d..9069e65c1c5 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -27,19 +27,13 @@ struct sk_buff; struct dst_entry; struct proto; -/* empty to "strongly type" an otherwise void parameter. - */ -struct request_values { -}; - struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(struct sock *sk, - struct request_sock *req, - struct request_values *rvp); + struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(struct sock *sk, diff --git a/include/net/tcp.h b/include/net/tcp.h index ab9f947b118..7f2f17198d7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -179,7 +179,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ -#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt @@ -454,7 +453,7 @@ extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); extern void tcp_parse_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx, const u8 **hvpp, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); @@ -476,7 +475,6 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc); extern int tcp_disconnect(struct sock *sk, int flags); @@ -1589,91 +1587,6 @@ struct tcp_request_sock_ops { #endif }; -/* Using SHA1 for now, define some constants. - */ -#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) -#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) -#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) - -extern int tcp_cookie_generator(u32 *bakery); - -/** - * struct tcp_cookie_values - each socket needs extra space for the - * cookies, together with (optional) space for any SYN data. - * - * A tcp_sock contains a pointer to the current value, and this is - * cloned to the tcp_timewait_sock. - * - * @cookie_pair: variable data from the option exchange. - * - * @cookie_desired: user specified tcpct_cookie_desired. Zero - * indicates default (sysctl_tcp_cookie_size). - * After cookie sent, remembers size of cookie. - * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. - * - * @s_data_desired: user specified tcpct_s_data_desired. When the - * constant payload is specified (@s_data_constant), - * holds its length instead. - * Range 0 to TCP_MSS_DESIRED. - * - * @s_data_payload: constant data that is to be included in the - * payload of SYN or SYNACK segments when the - * cookie option is present. - */ -struct tcp_cookie_values { - struct kref kref; - u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; - u8 cookie_pair_size; - u8 cookie_desired; - u16 s_data_desired:11, - s_data_constant:1, - s_data_in:1, - s_data_out:1, - s_data_unused:2; - u8 s_data_payload[0]; -}; - -static inline void tcp_cookie_values_release(struct kref *kref) -{ - kfree(container_of(kref, struct tcp_cookie_values, kref)); -} - -/* The length of constant payload data. Note that s_data_desired is - * overloaded, depending on s_data_constant: either the length of constant - * data (returned here) or the limit on variable data. - */ -static inline int tcp_s_data_size(const struct tcp_sock *tp) -{ - return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) - ? tp->cookie_values->s_data_desired - : 0; -} - -/** - * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. - * - * As tcp_request_sock has already been extended in other places, the - * only remaining method is to pass stack values along as function - * parameters. These parameters are not needed after sending SYNACK. - * - * @cookie_bakery: cryptographic secret and message workspace. - * - * @cookie_plus: bytes in authenticator/cookie option, copied from - * struct tcp_options_received (above). - */ -struct tcp_extend_values { - struct request_values rv; - u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; - u8 cookie_plus:6, - cookie_out_never:1, - cookie_in_always:1; -}; - -static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) -{ - return (struct tcp_extend_values *)rvp; -} - extern void tcp_v4_init(void); extern void tcp_init(void); diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 6b1ead0b0c9..8d776ebc482 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -102,7 +102,6 @@ enum { #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ -#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ #define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ @@ -199,29 +198,4 @@ struct tcp_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; -/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */ -#define TCP_COOKIE_MIN 8 /* 64-bits */ -#define TCP_COOKIE_MAX 16 /* 128-bits */ -#define TCP_COOKIE_PAIR_SIZE (2*TCP_COOKIE_MAX) - -/* Flags for both getsockopt and setsockopt */ -#define TCP_COOKIE_IN_ALWAYS (1 << 0) /* Discard SYN without cookie */ -#define TCP_COOKIE_OUT_NEVER (1 << 1) /* Prohibit outgoing cookies, - * supercedes everything. */ - -/* Flags for getsockopt */ -#define TCP_S_DATA_IN (1 << 2) /* Was data received? */ -#define TCP_S_DATA_OUT (1 << 3) /* Was data sent? */ - -/* TCP_COOKIE_TRANSACTIONS data */ -struct tcp_cookie_transactions { - __u16 tcpct_flags; /* see above */ - __u8 __tcpct_pad1; /* zero */ - __u8 tcpct_cookie_desired; /* bytes */ - __u16 tcpct_s_data_desired; /* bytes of variable data */ - __u16 tcpct_used; /* bytes in value */ - __u8 tcpct_value[TCP_MSS_DEFAULT]; -}; - - #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4f9f5eb478f..ebc54fef85a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; @@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req, NULL)) + if (dccp_v4_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e05981f271..9c61f9c02fd 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -213,8 +213,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req, NULL)) + if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 786d97aee75..6acb541c909 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) { - int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + int err = req->rsk_ops->rtx_syn_ack(parent, req); if (!err) req->num_retrans++; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377fb11..7f4a5cb8f8d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cca4550f408..cb45062c8be 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -732,13 +732,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "tcp_cookie_size", - .data = &sysctl_tcp_cookie_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_thin_linear_timeouts", .data = &sysctl_tcp_thin_linear_timeouts, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8d14573ade7..17a6810af5c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - /* TCP Cookie Transactions */ - if (sysctl_tcp_cookie_size > 0) { - /* Default, cookies without s_data_payload. */ - tp->cookie_values = - kzalloc(sizeof(*tp->cookie_values), - sk->sk_allocation); - if (tp->cookie_values != NULL) - kref_init(&tp->cookie_values->kref); - } /* Presumed zeroed, in order of appearance: * cookie_in_always, cookie_out_never, * s_data_constant, s_data_in, s_data_out @@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = NULL; - - if (sizeof(ctd) > optlen) - return -EINVAL; - if (copy_from_user(&ctd, optval, sizeof(ctd))) - return -EFAULT; - - if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || - ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) - return -EINVAL; - - if (ctd.tcpct_cookie_desired == 0) { - /* default to global value */ - } else if ((0x1 & ctd.tcpct_cookie_desired) || - ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || - ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { - return -EINVAL; - } - - if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { - /* Supercedes all other values */ - lock_sock(sk); - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } - tp->rx_opt.cookie_in_always = 0; /* false */ - tp->rx_opt.cookie_out_never = 1; /* true */ - release_sock(sk); - return err; - } - - /* Allocate ancillary memory before locking. - */ - if (ctd.tcpct_used > 0 || - (tp->cookie_values == NULL && - (sysctl_tcp_cookie_size > 0 || - ctd.tcpct_cookie_desired > 0 || - ctd.tcpct_s_data_desired > 0))) { - cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, - GFP_KERNEL); - if (cvp == NULL) - return -ENOMEM; - - kref_init(&cvp->kref); - } - lock_sock(sk); - tp->rx_opt.cookie_in_always = - (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); - tp->rx_opt.cookie_out_never = 0; /* false */ - - if (tp->cookie_values != NULL) { - if (cvp != NULL) { - /* Changed values are recorded by a changed - * pointer, ensuring the cookie will differ, - * without separately hashing each value later. - */ - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - } else { - cvp = tp->cookie_values; - } - } - - if (cvp != NULL) { - cvp->cookie_desired = ctd.tcpct_cookie_desired; - - if (ctd.tcpct_used > 0) { - memcpy(cvp->s_data_payload, ctd.tcpct_value, - ctd.tcpct_used); - cvp->s_data_desired = ctd.tcpct_used; - cvp->s_data_constant = 1; /* true */ - } else { - /* No constant payload data. */ - cvp->s_data_desired = ctd.tcpct_s_data_desired; - cvp->s_data_constant = 0; /* false */ - } - - tp->cookie_values = cvp; - } - release_sock(sk); - return err; - } default: /* fallthru */ break; @@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = tp->cookie_values; - - if (get_user(len, optlen)) - return -EFAULT; - if (len < sizeof(ctd)) - return -EINVAL; - - memset(&ctd, 0, sizeof(ctd)); - ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? - TCP_COOKIE_IN_ALWAYS : 0) - | (tp->rx_opt.cookie_out_never ? - TCP_COOKIE_OUT_NEVER : 0); - - if (cvp != NULL) { - ctd.tcpct_flags |= (cvp->s_data_in ? - TCP_S_DATA_IN : 0) - | (cvp->s_data_out ? - TCP_S_DATA_OUT : 0); - - ctd.tcpct_cookie_desired = cvp->cookie_desired; - ctd.tcpct_s_data_desired = cvp->s_data_desired; - - memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], - cvp->cookie_pair_size); - ctd.tcpct_used = cvp->cookie_pair_size; - } - - if (put_user(sizeof(ctd), optlen)) - return -EFAULT; - if (copy_to_user(optval, &ctd, sizeof(ctd))) - return -EFAULT; - return 0; - } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; @@ -3409,134 +3279,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif -/* Each Responder maintains up to two secret values concurrently for - * efficient secret rollover. Each secret value has 4 states: - * - * Generating. (tcp_secret_generating != tcp_secret_primary) - * Generates new Responder-Cookies, but not yet used for primary - * verification. This is a short-term state, typically lasting only - * one round trip time (RTT). - * - * Primary. (tcp_secret_generating == tcp_secret_primary) - * Used both for generation and primary verification. - * - * Retiring. (tcp_secret_retiring != tcp_secret_secondary) - * Used for verification, until the first failure that can be - * verified by the newer Generating secret. At that time, this - * cookie's state is changed to Secondary, and the Generating - * cookie's state is changed to Primary. This is a short-term state, - * typically lasting only one round trip time (RTT). - * - * Secondary. (tcp_secret_retiring == tcp_secret_secondary) - * Used for secondary verification, after primary verification - * failures. This state lasts no more than twice the Maximum Segment - * Lifetime (2MSL). Then, the secret is discarded. - */ -struct tcp_cookie_secret { - /* The secret is divided into two parts. The digest part is the - * equivalent of previously hashing a secret and saving the state, - * and serves as an initialization vector (IV). The message part - * serves as the trailing secret. - */ - u32 secrets[COOKIE_WORKSPACE_WORDS]; - unsigned long expires; -}; - -#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) -#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) -#define TCP_SECRET_LIFE (HZ * 600) - -static struct tcp_cookie_secret tcp_secret_one; -static struct tcp_cookie_secret tcp_secret_two; - -/* Essentially a circular list, without dynamic allocation. */ -static struct tcp_cookie_secret *tcp_secret_generating; -static struct tcp_cookie_secret *tcp_secret_primary; -static struct tcp_cookie_secret *tcp_secret_retiring; -static struct tcp_cookie_secret *tcp_secret_secondary; - -static DEFINE_SPINLOCK(tcp_secret_locker); - -/* Select a pseudo-random word in the cookie workspace. - */ -static inline u32 tcp_cookie_work(const u32 *ws, const int n) -{ - return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; -} - -/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. - * Called in softirq context. - * Returns: 0 for success. - */ -int tcp_cookie_generator(u32 *bakery) -{ - unsigned long jiffy = jiffies; - - if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { - spin_lock_bh(&tcp_secret_locker); - if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { - /* refreshed by another */ - memcpy(bakery, - &tcp_secret_generating->secrets[0], - COOKIE_WORKSPACE_WORDS); - } else { - /* still needs refreshing */ - get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); - - /* The first time, paranoia assumes that the - * randomization function isn't as strong. But, - * this secret initialization is delayed until - * the last possible moment (packet arrival). - * Although that time is observable, it is - * unpredictably variable. Mash in the most - * volatile clock bits available, and expire the - * secret extra quickly. - */ - if (unlikely(tcp_secret_primary->expires == - tcp_secret_secondary->expires)) { - struct timespec tv; - - getnstimeofday(&tv); - bakery[COOKIE_DIGEST_WORDS+0] ^= - (u32)tv.tv_nsec; - - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_1MSL - + (0x0f & tcp_cookie_work(bakery, 0)); - } else { - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_LIFE - + (0xff & tcp_cookie_work(bakery, 1)); - tcp_secret_primary->expires = jiffy - + TCP_SECRET_2MSL - + (0x1f & tcp_cookie_work(bakery, 2)); - } - memcpy(&tcp_secret_secondary->secrets[0], - bakery, COOKIE_WORKSPACE_WORDS); - - rcu_assign_pointer(tcp_secret_generating, - tcp_secret_secondary); - rcu_assign_pointer(tcp_secret_retiring, - tcp_secret_primary); - /* - * Neither call_rcu() nor synchronize_rcu() needed. - * Retiring data is not freed. It is replaced after - * further (locked) pointer updates, and a quiet time - * (minimum 1MSL, maximum LIFE - 2MSL). - */ - } - spin_unlock_bh(&tcp_secret_locker); - } else { - rcu_read_lock_bh(); - memcpy(bakery, - &rcu_dereference(tcp_secret_generating)->secrets[0], - COOKIE_WORKSPACE_WORDS); - rcu_read_unlock_bh(); - } - return 0; -} -EXPORT_SYMBOL(tcp_cookie_generator); - void tcp_done(struct sock *sk) { struct request_sock *req = tcp_sk(sk)->fastopen_rsk; @@ -3591,7 +3333,6 @@ void __init tcp_init(void) unsigned long limit; int max_rshare, max_wshare, cnt; unsigned int i; - unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3667,13 +3408,5 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); - memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); - memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); - tcp_secret_one.expires = jiffy; /* past due */ - tcp_secret_two.expires = jiffy; /* past due */ - tcp_secret_generating = &tcp_secret_one; - tcp_secret_primary = &tcp_secret_one; - tcp_secret_retiring = &tcp_secret_two; - tcp_secret_secondary = &tcp_secret_two; tcp_tasklet_init(); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 836d74dd018..19f0149fb6a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3760,8 +3760,8 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, - const u8 **hvpp, int estab, +void tcp_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) { const unsigned char *ptr; @@ -3845,31 +3845,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o */ break; #endif - case TCPOPT_COOKIE: - /* This option is variable length. - */ - switch (opsize) { - case TCPOLEN_COOKIE_BASE: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_PAIR: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_MIN+0: - case TCPOLEN_COOKIE_MIN+2: - case TCPOLEN_COOKIE_MIN+4: - case TCPOLEN_COOKIE_MIN+6: - case TCPOLEN_COOKIE_MAX: - /* 16-bit multiple */ - opt_rx->cookie_plus = opsize; - *hvpp = ptr; - break; - default: - /* ignore option */ - break; - } - break; - case TCPOPT_EXP: /* Fast Open option shares code 254 using a * 16 bits magic number. It's valid only in @@ -3915,8 +3890,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr * If it is wrong it falls back on tcp_parse_options(). */ static bool tcp_fast_parse_options(const struct sk_buff *skb, - const struct tcphdr *th, - struct tcp_sock *tp, const u8 **hvpp) + const struct tcphdr *th, struct tcp_sock *tp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3930,7 +3904,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, return true; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5311,12 +5285,10 @@ out: static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, int syn_inerr) { - const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp, &hash_location) && - tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5670,12 +5642,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; - const u8 *hash_location; /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(synack, &opt, &hash_location, 0, NULL); + tcp_parse_options(synack, &opt, 0, NULL); mss = opt.mss_clamp; } @@ -5706,14 +5677,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len) { - const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5810,30 +5779,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * is initialized. */ tp->copied_seq = tp->rcv_nxt; - if (cvp != NULL && - cvp->cookie_pair_size > 0 && - tp->rx_opt.cookie_plus > 0) { - int cookie_size = tp->rx_opt.cookie_plus - - TCPOLEN_COOKIE_BASE; - int cookie_pair_size = cookie_size - + cvp->cookie_desired; - - /* A cookie extension option was sent and returned. - * Note that each incoming SYNACK replaces the - * Responder cookie. The initial exchange is most - * fragile, as protection against spoofing relies - * entirely upon the sequence and timestamp (above). - * This replacement strategy allows the correct pair to - * pass through, while any others will be filtered via - * Responder verification later. - */ - if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { - memcpy(&cvp->cookie_pair[cvp->cookie_desired], - hash_location, cookie_size); - cvp->cookie_pair_size = cookie_pair_size; - } - } - smp_mb(); tcp_finish_connect(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b7ab868c828..b27c758ca23 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping, bool nocache) { @@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, 0, false); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, static int tcp_v4_conn_req_fastopen(struct sock *sk, struct sk_buff *skb, struct sk_buff *skb_synack, - struct request_sock *req, - struct request_values *rvp) + struct request_sock *req) { struct tcp_sock *tp = tcp_sk(sk); struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; @@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, - want_cookie ? NULL : &foc); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_release; - - /* Secret recipe starts with IP addresses */ - *mess++ ^= (__force u32)daddr; - *mess++ ^= (__force u32)saddr; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_release; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * of tcp_v4_send_synack()->tcp_select_initial_window(). */ skb_synack = tcp_make_synack(sk, dst, req, - (struct request_values *)&tmp_ext, fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); if (skb_synack) { @@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (fastopen_cookie_present(&foc) && foc.len != 0) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, - (struct request_values *)&tmp_ext)) + } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) goto drop_and_free; return 0; @@ -2241,12 +2199,6 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* TCP Cookie Transactions */ - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } BUG_ON(tp->fastopen_rsk != NULL); /* If socket is aborted during connect operation */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4bdb09fca40..8f0234f8bb9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; @@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp = tcp_sk(newsk); - struct tcp_sock *oldtp = tcp_sk(sk); - struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - - /* TCP Cookie Transactions require space for the cookie pair, - * as it differs for each connection. There is no need to - * copy any s_data_payload stored at the original socket. - * Failure will prevent resuming the connection. - * - * Presumed copied, in order of appearance: - * cookie_in_always, cookie_out_never - */ - if (oldcvp != NULL) { - struct tcp_cookie_values *newcvp = - kzalloc(sizeof(*newtp->cookie_values), - GFP_ATOMIC); - - if (newcvp != NULL) { - kref_init(&newcvp->kref); - newcvp->cookie_desired = - oldcvp->cookie_desired; - newtp->cookie_values = newcvp; - } else { - /* Not Yet Implemented */ - newtp->cookie_values = NULL; - } - } /* Now setup tcp_sock */ newtp->pred_flags = 0; @@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); @@ -460,8 +432,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = newtp->pushed_seq = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; @@ -538,7 +509,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, bool fastopen) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); @@ -548,7 +518,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -648,7 +618,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != - tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) + tcp_rsk(req)->snt_isn + 1)) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8e7742f0b5d..ac5871ebe08 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,9 +65,6 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ -EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -386,7 +383,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) -#define OPTION_COOKIE_EXTENSION (1 << 4) #define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { @@ -400,36 +396,6 @@ struct tcp_out_options { struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; -/* The sysctl int routines are generic, so check consistency here. - */ -static u8 tcp_cookie_size_check(u8 desired) -{ - int cookie_size; - - if (desired > 0) - /* previously specified */ - return desired; - - cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); - if (cookie_size <= 0) - /* no default specified */ - return 0; - - if (cookie_size <= TCP_COOKIE_MIN) - /* value too small, specify minimum */ - return TCP_COOKIE_MIN; - - if (cookie_size >= TCP_COOKIE_MAX) - /* value too large, specify maximum */ - return TCP_COOKIE_MAX; - - if (cookie_size & 1) - /* 8-bit multiple, illegal, fix it */ - cookie_size++; - - return (u8)cookie_size; -} - /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -448,27 +414,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, { u16 options = opts->options; /* mungable copy */ - /* Having both authentication and cookies for security is redundant, - * and there's certainly not enough room. Instead, the cookie-less - * extension variant is proposed. - * - * Consider the pessimal case with authentication. The options - * could look like: - * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 - */ if (unlikely(OPTION_MD5 & options)) { - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - *ptr++ = htonl((TCPOPT_COOKIE << 24) | - (TCPOLEN_COOKIE_BASE << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } else { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } - options &= ~OPTION_COOKIE_EXTENSION; + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; @@ -497,44 +445,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - /* Specification requires after timestamp, so do it now. - * - * Consider the pessimal case without authentication. The options - * could look like: - * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 - */ - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - __u8 *cookie_copy = opts->hash_location; - u8 cookie_size = opts->hash_size; - - /* 8-bit multiple handled in tcp_cookie_size_check() above, - * and elsewhere. - */ - if (0x2 & cookie_size) { - __u8 *p = (__u8 *)ptr; - - /* 16-bit multiple */ - *p++ = TCPOPT_COOKIE; - *p++ = TCPOLEN_COOKIE_BASE + cookie_size; - *p++ = *cookie_copy++; - *p++ = *cookie_copy++; - ptr++; - cookie_size -= 2; - } else { - /* 32-bit multiple */ - *ptr++ = htonl(((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_COOKIE << 8) | - TCPOLEN_COOKIE_BASE) + - cookie_size); - } - - if (cookie_size > 0) { - memcpy(ptr, cookie_copy, cookie_size); - ptr += (cookie_size / 4); - } - } - if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -593,11 +503,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? - tcp_cookie_size_check(cvp->cookie_desired) : - 0; struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG @@ -649,52 +555,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, tp->syn_fastopen = 1; } } - /* Note that timestamps are required by the specification. - * - * Odd numbers of bytes are prohibited by the specification, ensuring - * that the cookie is 16-bit aligned, and the resulting cookie pair is - * 32-bit aligned. - */ - if (*md5 == NULL && - (OPTION_TS & opts->options) && - cookie_size > 0) { - int need = TCPOLEN_COOKIE_BASE + cookie_size; - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - - if (need > remaining) { - /* try shrinking cookie to fit */ - cookie_size -= 2; - need -= 4; - } - } - while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { - cookie_size -= 4; - need -= 4; - } - if (TCP_COOKIE_MIN <= cookie_size) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; - opts->hash_size = cookie_size; - - /* Remember for future incarnations. */ - cvp->cookie_desired = cookie_size; - - if (cvp->cookie_desired != cvp->cookie_pair_size) { - /* Currently use random bytes as a nonce, - * assuming these are completely unpredictable - * by hostile users of the same system. - */ - get_random_bytes(&cvp->cookie_pair[0], - cookie_size); - cvp->cookie_pair_size = cookie_size; - } - remaining -= need; - } - } return MAX_TCP_OPTION_SPACE - remaining; } @@ -704,14 +565,10 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? - xvp->cookie_plus : - 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); @@ -759,28 +616,7 @@ static unsigned int tcp_synack_options(struct sock *sk, remaining -= need; } } - /* Similar rationale to tcp_syn_options() applies here, too. - * If the options fit, the same options should fit now! - */ - if (*md5 == NULL && - ireq->tstamp_ok && - cookie_plus > TCPOLEN_COOKIE_BASE) { - int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - } - if (need <= remaining) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; - remaining -= need; - } else { - /* There's no error return, so flag it. */ - xvp->cookie_out_never = 1; /* true */ - opts->hash_size = 0; - } - } + return MAX_TCP_OPTION_SPACE - remaining; } @@ -2802,32 +2638,24 @@ int tcp_send_synack(struct sock *sk) * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer - * rvp: request_values pointer * * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; - struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); - const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; - int s_data_desired = 0; - if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) - s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, - sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2869,9 +2697,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp, foc) - + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + foc) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2889,40 +2716,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPHDR_SYN | TCPHDR_ACK); - if (OPTION_COOKIE_EXTENSION & opts.options) { - if (s_data_desired) { - u8 *buf = skb_put(skb, s_data_desired); - - /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, s_data_desired); - TCP_SKB_CB(skb)->end_seq += s_data_desired; - } - - if (opts.hash_size > 0) { - __u32 workspace[SHA_WORKSPACE_WORDS]; - u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; - u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; - - /* Secret recipe depends on the Timestamp, (future) - * Sequence and Acknowledgment Numbers, Initiator - * Cookie, and others handled by IP variant caller. - */ - *tail-- ^= opts.tsval; - *tail-- ^= tcp_rsk(req)->rcv_isn + 1; - *tail-- ^= TCP_SKB_CB(skb)->seq + 1; - - /* recommended */ - *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); - *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ - - sha_transform((__u32 *)&xvp->cookie_bakery[0], - (char *)mess, - &workspace[0]); - opts.hash_location = - (__u8 *)&xvp->cookie_bakery[0]; - } - } - th->seq = htonl(TCP_SKB_CB(skb)->seq); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8a0848b60b3..d5dda20bd71 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9b6460055df..0a97add2ab7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -454,7 +454,6 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping) { struct inet6_request_sock *treq = inet6_rsk(req); @@ -466,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -481,13 +480,12 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) { struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); return res; @@ -940,9 +938,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -980,50 +976,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *d; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_free; - - /* Secret recipe starts with IP addresses */ - d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_free; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1101,7 +1054,6 @@ have_isn: goto drop_and_release; if (tcp_v6_send_synack(sk, dst, &fl6, req, - (struct request_values *)&tmp_ext, skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; -- cgit v1.2.3-70-g09d2 From 2908fe31cf6b8d3a975efb567347f85e724f4e81 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:06:56 -0500 Subject: Bluetooth: Remove useless HCI_PENDING_CLASS flag Now that class related operations are tracked through asynchronous HCI requests this flag is no longer needed. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 1 - net/bluetooth/hci_event.c | 3 +-- net/bluetooth/mgmt.c | 4 ---- 3 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 7f12c25f1fc..1e723c76a8f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -119,7 +119,6 @@ enum { HCI_CONNECTABLE, HCI_DISCOVERABLE, HCI_LINK_SECURITY, - HCI_PENDING_CLASS, HCI_PERIODIC_INQ, }; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d11b87bc1d1..5f2d008f335 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -194,8 +194,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); /* Reset all non-persistent flags */ - hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | - BIT(HCI_PERIODIC_INQ)); + hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)); hdev->discovery.state = DISCOVERY_STOPPED; hdev->inq_tx_power = HCI_TX_POWER_INVALID; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8a0bbb914be..3e3cb0102b1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -652,8 +652,6 @@ static void update_class(struct hci_request *req) return; hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); - - set_bit(HCI_PENDING_CLASS, &hdev->dev_flags); } static void service_cache_off(struct work_struct *work) @@ -3762,8 +3760,6 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; int err = 0; - clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags); - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); -- cgit v1.2.3-70-g09d2 From 2cc6fb0049bc02ca7a020ba7b4f88b4c35976058 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:06:57 -0500 Subject: Bluetooth: Add a define for the HCI persistent flags mask We'll need to use this mask also when powering off the HCI device so it's better to have this in a single and visible place. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 5 +++++ net/bluetooth/hci_event.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1e723c76a8f..1e40222e9dd 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -122,6 +122,11 @@ enum { HCI_PERIODIC_INQ, }; +/* A mask for the flags that are supposed to remain when a reset happens + * or the HCI device is closed. + */ +#define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)) + /* HCI ioctl defines */ #define HCIDEVUP _IOW('H', 201, int) #define HCIDEVDOWN _IOW('H', 202, int) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5f2d008f335..ed4ecd930a7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -194,7 +194,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); /* Reset all non-persistent flags */ - hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)); + hdev->dev_flags &= ~HCI_PERSISTENT_MASK; hdev->discovery.state = DISCOVERY_STOPPED; hdev->inq_tx_power = HCI_TX_POWER_INVALID; -- cgit v1.2.3-70-g09d2 From 04b4edcbc9049e100681c0149b572de439be42ab Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:07:01 -0500 Subject: Bluetooth: Handle AD updating through an async request For proper control of the AD update and the related HCI commands it's best to run the AD update through an async request instead of a standalone HCI command. This patch changes the hci_update_ad() function to take a request pointer and updates its users appropriately. E.g. the function is no longer called after the init sequence but during stage 3 of the init sequence. The TX power is read during the init sequence, so we don't need an explicit update whenever it is read and the AD update based on the local name should be done through the local name mgmt handler. The only other user is the update based on enabling advertising. This part is still kept as there is no mgmt API to enable it. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 4 ++-- net/bluetooth/hci_core.c | 29 ++++++++++------------------- net/bluetooth/hci_event.c | 19 +++++++++---------- 3 files changed, 21 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d6c32561fc0..cb99193c749 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -740,8 +740,6 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *hash, u8 *randomizer); int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr); -int hci_update_ad(struct hci_dev *hdev); - void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_frame(struct sk_buff *skb); @@ -1167,6 +1165,8 @@ struct hci_sec_filter { #define hci_req_lock(d) mutex_lock(&d->req_lock) #define hci_req_unlock(d) mutex_unlock(&d->req_lock) +void hci_update_ad(struct hci_request *req); + void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8], diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9e87a91562a..0ffd3587117 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -492,8 +492,10 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[5] & 0x10) hci_setup_link_policy(req); - if (lmp_le_capable(hdev)) + if (lmp_le_capable(hdev)) { hci_set_le_support(req); + hci_update_ad(req); + } } static int __hci_init(struct hci_dev *hdev) @@ -936,39 +938,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr) return ad_len; } -int hci_update_ad(struct hci_dev *hdev) +void hci_update_ad(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_adv_data cp; u8 len; - int err; - - hci_dev_lock(hdev); - if (!lmp_le_capable(hdev)) { - err = -EINVAL; - goto unlock; - } + if (!lmp_le_capable(hdev)) + return; memset(&cp, 0, sizeof(cp)); len = create_ad(hdev, cp.data); if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) { - err = 0; - goto unlock; - } + memcmp(cp.data, hdev->adv_data, len) == 0) + return; memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); hdev->adv_data_len = len; cp.length = len; - err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); -unlock: - hci_dev_unlock(hdev); - - return err; + hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } /* ---- HCI ioctl helpers ---- */ @@ -1025,7 +1017,6 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - hci_update_ad(hdev); if (!test_bit(HCI_SETUP, &hdev->dev_flags) && mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ed4ecd930a7..84edacbc14a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -223,9 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); hci_dev_unlock(hdev); - - if (!status && !test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -776,11 +773,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) { + if (!rp->status) hdev->adv_tx_power = rp->tx_power; - if (!test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); - } } static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -877,10 +871,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); } - hci_dev_unlock(hdev); + if (!test_bit(HCI_INIT, &hdev->flags)) { + struct hci_request req; - if (!test_bit(HCI_INIT, &hdev->flags)) - hci_update_ad(hdev); + hci_req_init(&req, hdev); + hci_update_ad(&req); + hci_req_run(&req, NULL); + } + + hci_dev_unlock(hdev); } static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From 1a4d3c4b3750885733641216756de4e4d9b2443a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:07:08 -0500 Subject: Bluetooth: Add proper flag for fast connectable mode In order to be able to represent fast connectable mode in the mgmt settings we need to have a HCI dev flag for it. This patch adds the flag and makes sure its value is changed whenever a mgmt_set_fast_connectable command completes. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 4 +++- net/bluetooth/mgmt.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1e40222e9dd..b854506c859 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -120,12 +120,14 @@ enum { HCI_DISCOVERABLE, HCI_LINK_SECURITY, HCI_PERIODIC_INQ, + HCI_FAST_CONNECTABLE, }; /* A mask for the flags that are supposed to remain when a reset happens * or the HCI device is closed. */ -#define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ)) +#define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ) | \ + BIT(HCI_FAST_CONNECTABLE)) /* HCI ioctl defines */ #define HCIDEVUP _IOW('H', 201, int) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e89938e0233..b6a33c5e768 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -410,6 +410,9 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_CONNECTABLE; + if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) + settings |= MGMT_SETTING_FAST_CONNECTABLE; + if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_DISCOVERABLE; @@ -2913,6 +2916,13 @@ static void fast_connectable_complete(struct hci_dev *hdev, u8 status) cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, mgmt_status(status)); } else { + struct mgmt_mode *cp = cmd->param; + + if (cp->val) + set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + else + clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); new_settings(hdev, cmd->sk); } @@ -2959,6 +2969,12 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, goto unlock; } + if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) { + err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, + hdev); + goto unlock; + } + if (cp->val) { type = PAGE_SCAN_TYPE_INTERLACED; -- cgit v1.2.3-70-g09d2 From f332ec6699980e0563408c7bcf1a8a31b825fee1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 15 Mar 2013 17:07:11 -0500 Subject: Bluetooth: Add reading of page scan parameters These parameters are related to the "fast connectable" mode that can be changed through the mgmt interface. Not all controllers properly reset these values with HCI_Reset so they need to be read in order to be able to verify whether the values are correct or not before enabling page scan. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 13 +++++++++++++ include/net/bluetooth/hci_core.h | 4 ++++ net/bluetooth/hci_core.c | 6 ++++++ net/bluetooth/hci_event.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index b854506c859..b3308927a0a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -887,12 +887,25 @@ struct hci_rp_read_data_block_size { __le16 num_blocks; } __packed; +#define HCI_OP_READ_PAGE_SCAN_ACTIVITY 0x0c1b +struct hci_rp_read_page_scan_activity { + __u8 status; + __le16 interval; + __le16 window; +} __packed; + #define HCI_OP_WRITE_PAGE_SCAN_ACTIVITY 0x0c1c struct hci_cp_write_page_scan_activity { __le16 interval; __le16 window; } __packed; +#define HCI_OP_READ_PAGE_SCAN_TYPE 0x0c46 +struct hci_rp_read_page_scan_type { + __u8 status; + __u8 type; +} __packed; + #define HCI_OP_WRITE_PAGE_SCAN_TYPE 0x0c47 #define PAGE_SCAN_TYPE_STANDARD 0x00 #define PAGE_SCAN_TYPE_INTERLACED 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cb99193c749..358a6983d3b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -165,6 +165,10 @@ struct hci_dev { __u16 voice_setting; __u8 io_capability; __s8 inq_tx_power; + __u16 page_scan_interval; + __u16 page_scan_window; + __u8 page_scan_type; + __u16 devid_source; __u16 devid_vendor; __u16 devid_product; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0ffd3587117..cfcad5423f1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -272,6 +272,12 @@ static void bredr_setup(struct hci_request *req) bacpy(&cp.bdaddr, BDADDR_ANY); cp.delete_all = 0x01; hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); + + /* Read page scan parameters */ + if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) { + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL); + } } static void le_setup(struct hci_request *req) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 84edacbc14a..3c6d0a4f78d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -601,6 +601,30 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) bacpy(&hdev->bdaddr, &rp->bdaddr); } +static void hci_cc_read_page_scan_activity(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_page_scan_activity *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) { + hdev->page_scan_interval = __le16_to_cpu(rp->interval); + hdev->page_scan_window = __le16_to_cpu(rp->window); + } +} + +static void hci_cc_read_page_scan_type(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_page_scan_type *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) + hdev->page_scan_type = rp->type; +} + static void hci_cc_read_data_block_size(struct hci_dev *hdev, struct sk_buff *skb) { @@ -2204,6 +2228,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_bd_addr(hdev, skb); break; + case HCI_OP_READ_PAGE_SCAN_ACTIVITY: + hci_cc_read_page_scan_activity(hdev, skb); + break; + + case HCI_OP_READ_PAGE_SCAN_TYPE: + hci_cc_read_page_scan_type(hdev, skb); + break; + case HCI_OP_READ_DATA_BLOCK_SIZE: hci_cc_read_data_block_size(hdev, skb); break; -- cgit v1.2.3-70-g09d2 From ddbfe860acc39d4856a86186eb8a292426ea6224 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 8 Mar 2013 14:46:14 +0100 Subject: mac80211: move sdata debugfs dir to vif There is need create driver own per interface debugfs files. This is currently done by drv_{add,remove}_interface_debugfs() callbacks. But it is possible that after we remove interface from the driver (i.e. on suspend) we call drv_remove_interface_debugfs() function. Fixing this problem will require to add call drv_{add,remove}_interface_debugfs() anytime we create and remove interface in mac80211. So it's better to add debugfs dir dentry to vif structure to allow to create/remove custom debugfs driver files on drv_{add,remove}_interface(). Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ net/mac80211/debugfs_key.c | 10 +++++----- net/mac80211/debugfs_netdev.c | 22 +++++++++++----------- net/mac80211/driver-ops.h | 4 ++-- net/mac80211/ieee80211_i.h | 1 - 5 files changed, 25 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f5db5e97042..8b2c7506f5c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1067,6 +1067,9 @@ enum ieee80211_vif_flags { * path needing to access it; even though the netdev carrier will always * be off when it is %NULL there can still be races and packets could be * processed after it switches back to %NULL. + * @debugfs_dir: debugfs dentry, can be used by drivers to create own per + * interface debug files. Note that it will be NULL for the virtual + * monitor interface (if that is requested.) * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ @@ -1083,6 +1086,10 @@ struct ieee80211_vif { u32 driver_flags; +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfs_dir; +#endif + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index c3a3082b72e..1521cabad3d 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; lockdep_assert_held(&sdata->local->key_mtx); @@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_unicast_key = debugfs_create_symlink("default_unicast_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } if (sdata->debugfs.default_multicast_key) { @@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_multicast_key = debugfs_create_symlink("default_multicast_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } } @@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; key = key_mtx_dereference(sdata->local, @@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_mgmt_key = debugfs_create_symlink("default_mgmt_key", - sdata->debugfs.dir, buf); + sdata->vif.debugfs_dir, buf); } else ieee80211_debugfs_key_remove_mgmt_default(sdata); } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 059bbb82e84..ddb42686790 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -521,7 +521,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, #endif #define DEBUGFS_ADD_MODE(name, mode) \ - debugfs_create_file(#name, mode, sdata->debugfs.dir, \ + debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \ sdata, &name##_ops); #define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) @@ -577,7 +577,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata) static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) { struct dentry *dir = debugfs_create_dir("mesh_stats", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); #define MESHSTATS_ADD(name)\ debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); @@ -594,7 +594,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) static void add_mesh_config(struct ieee80211_sub_if_data *sdata) { struct dentry *dir = debugfs_create_dir("mesh_config", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); #define MESHPARAMS_ADD(name) \ debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); @@ -631,7 +631,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) static void add_files(struct ieee80211_sub_if_data *sdata) { - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; DEBUGFS_ADD(flags); @@ -673,21 +673,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) char buf[10+IFNAMSIZ]; sprintf(buf, "netdev:%s", sdata->name); - sdata->debugfs.dir = debugfs_create_dir(buf, + sdata->vif.debugfs_dir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); - if (sdata->debugfs.dir) + if (sdata->vif.debugfs_dir) sdata->debugfs.subdir_stations = debugfs_create_dir("stations", - sdata->debugfs.dir); + sdata->vif.debugfs_dir); add_files(sdata); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) { - if (!sdata->debugfs.dir) + if (!sdata->vif.debugfs_dir) return; - debugfs_remove_recursive(sdata->debugfs.dir); - sdata->debugfs.dir = NULL; + debugfs_remove_recursive(sdata->vif.debugfs_dir); + sdata->vif.debugfs_dir = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) @@ -695,7 +695,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) struct dentry *dir; char buf[10 + IFNAMSIZ]; - dir = sdata->debugfs.dir; + dir = sdata->vif.debugfs_dir; if (!dir) return; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 025b7592b79..0059f3886d4 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -560,7 +560,7 @@ void drv_add_interface_debugfs(struct ieee80211_local *local, return; local->ops->add_interface_debugfs(&local->hw, &sdata->vif, - sdata->debugfs.dir); + sdata->vif.debugfs_dir); } static inline @@ -575,7 +575,7 @@ void drv_remove_interface_debugfs(struct ieee80211_local *local, return; local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, - sdata->debugfs.dir); + sdata->vif.debugfs_dir); } #else static inline diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 95beb18588f..d6e92060982 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -758,7 +758,6 @@ struct ieee80211_sub_if_data { #ifdef CONFIG_MAC80211_DEBUGFS struct { - struct dentry *dir; struct dentry *subdir_stations; struct dentry *default_unicast_key; struct dentry *default_multicast_key; -- cgit v1.2.3-70-g09d2 From d260ff12e7768444b4da7612b785cfd7cbc1d1c1 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 8 Mar 2013 14:46:15 +0100 Subject: mac80211: remove vif debugfs driver callbacks This basically reverts commit b207cdb07f3f01ec1adaac62e9d0cc918c60a81a. Now is possible to use drv_{add,remove}_interface() and vif->debugfs_dir to create/remove per interface debugfs files. Remove redundant callbacks. Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- include/net/mac80211.h | 18 ------------------ net/mac80211/driver-ops.h | 37 ------------------------------------- net/mac80211/iface.c | 4 ---- 3 files changed, 59 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8b2c7506f5c..0b912d22f82 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2233,18 +2233,6 @@ enum ieee80211_roc_type { * MAC address of the device going away. * Hence, this callback must be implemented. It can sleep. * - * @add_interface_debugfs: Drivers can use this callback to add debugfs files - * when a vif is added to mac80211. This callback and - * @remove_interface_debugfs should be within a CONFIG_MAC80211_DEBUGFS - * conditional. @remove_interface_debugfs must be provided for cleanup. - * This callback can sleep. - * - * @remove_interface_debugfs: Remove the debugfs files which were added using - * @add_interface_debugfs. This callback must remove all debugfs entries - * that were added because mac80211 only removes interface debugfs when the - * interface is destroyed, not when it is removed from the driver. - * This callback can sleep. - * * @config: Handler for configuration requests. IEEE 802.11 code calls this * function to change hardware configuration, e.g., channel. * This function should never fail but returns a negative error code @@ -2665,12 +2653,6 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); - void (*add_interface_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct dentry *dir); - void (*remove_interface_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 0059f3886d4..7b9ff53bd2e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -547,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, sta, dir); } - -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) -{ - might_sleep(); - - check_sdata_in_driver(sdata); - - if (!local->ops->add_interface_debugfs) - return; - - local->ops->add_interface_debugfs(&local->hw, &sdata->vif, - sdata->vif.debugfs_dir); -} - -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) -{ - might_sleep(); - - check_sdata_in_driver(sdata); - - if (!local->ops->remove_interface_debugfs) - return; - - local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, - sdata->vif.debugfs_dir); -} -#else -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) {} -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) {} #endif static inline __must_check diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9875e321c9e..80e838bc875 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -557,8 +557,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) goto err_del_interface; } - drv_add_interface_debugfs(local, sdata); - if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; local->fif_probe_req++; @@ -846,8 +844,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: skb_queue_purge(&sdata->skb_queue); - drv_remove_interface_debugfs(local, sdata); - if (going_down) drv_remove_interface(local, sdata); } -- cgit v1.2.3-70-g09d2 From 39ecc01d1bbe3de2cf5f01a81e176ea5160d3b95 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Feb 2013 12:11:00 +0100 Subject: mac80211: pass queue bitmap to flush operation There are a number of situations in which mac80211 only really needs to flush queues for one virtual interface, and in fact during this frames might be transmitted on other virtual interfaces. Calculate and pass a queue bitmap to the driver so it knows which queues to flush. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ar5523/ar5523.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/carl9170/main.c | 2 +- .../net/wireless/brcm80211/brcmsmac/mac80211_if.c | 2 +- drivers/net/wireless/iwlegacy/common.c | 3 +-- drivers/net/wireless/iwlegacy/common.h | 2 +- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 2 +- drivers/net/wireless/mac80211_hwsim.c | 2 +- drivers/net/wireless/p54/main.c | 2 +- drivers/net/wireless/rt2x00/rt2x00.h | 2 +- drivers/net/wireless/rt2x00/rt2x00mac.c | 2 +- drivers/net/wireless/rtlwifi/core.c | 2 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- include/net/mac80211.h | 9 +++++--- net/mac80211/driver-ops.h | 7 +++--- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/iface.c | 2 +- net/mac80211/mlme.c | 8 +++---- net/mac80211/offchannel.c | 4 ++-- net/mac80211/pm.c | 2 +- net/mac80211/scan.c | 4 ++-- net/mac80211/trace.h | 11 ++++++---- net/mac80211/util.c | 25 ++++++++++++++++++++++ 23 files changed, 67 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 7157f7d311c..afd1e36d308 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1091,7 +1091,7 @@ static int ar5523_set_rts_threshold(struct ieee80211_hw *hw, u32 value) return ret; } -static void ar5523_flush(struct ieee80211_hw *hw, bool drop) +static void ar5523_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct ar5523 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 6e66f9c6782..24650fd4169 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1745,7 +1745,7 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class) mutex_unlock(&sc->mutex); } -static void ath9k_flush(struct ieee80211_hw *hw, bool drop) +static void ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index f293b3ff475..08b19319994 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1703,7 +1703,7 @@ found: return 0; } -static void carl9170_op_flush(struct ieee80211_hw *hw, bool drop) +static void carl9170_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct ar9170 *ar = hw->priv; unsigned int vid; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index c6451c61407..aa5f43fee5e 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -723,7 +723,7 @@ static bool brcms_tx_flush_completed(struct brcms_info *wl) return result; } -static void brcms_ops_flush(struct ieee80211_hw *hw, bool drop) +static void brcms_ops_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct brcms_info *wl = hw->priv; int ret; diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index e006ea83132..722bfb57cfd 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4704,8 +4704,7 @@ out: } EXPORT_SYMBOL(il_mac_change_interface); -void -il_mac_flush(struct ieee80211_hw *hw, bool drop) +void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct il_priv *il = hw->priv; unsigned long timeout = jiffies + msecs_to_jiffies(500); diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h index 73bd3ef316c..728aa1306ab 100644 --- a/drivers/net/wireless/iwlegacy/common.h +++ b/drivers/net/wireless/iwlegacy/common.h @@ -1720,7 +1720,7 @@ void il_mac_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum nl80211_iftype newtype, bool newp2p); -void il_mac_flush(struct ieee80211_hw *hw, bool drop); +void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop); int il_alloc_txq_mem(struct il_priv *il); void il_free_txq_mem(struct il_priv *il); diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index c7cd2dffa5c..a7294fa4d7e 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1100,7 +1100,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } -static void iwlagn_mac_flush(struct ieee80211_hw *hw, bool drop) +static void iwlagn_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 7490c4fc717..3466f1a8a8d 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1389,7 +1389,7 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw, return 0; } -static void mac80211_hwsim_flush(struct ieee80211_hw *hw, bool drop) +static void mac80211_hwsim_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { /* Not implemented, queues only on kernel side */ } diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index aadda99989c..ee654a691f3 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -670,7 +670,7 @@ static unsigned int p54_flush_count(struct p54_common *priv) return total; } -static void p54_flush(struct ieee80211_hw *dev, bool drop) +static void p54_flush(struct ieee80211_hw *dev, u32 queues, bool drop) { struct p54_common *priv = dev->priv; unsigned int total, i; diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 086abb403a4..041b392f4f4 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -1360,7 +1360,7 @@ int rt2x00mac_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue, const struct ieee80211_tx_queue_params *params); void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw); -void rt2x00mac_flush(struct ieee80211_hw *hw, bool drop); +void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop); int rt2x00mac_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); int rt2x00mac_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant); void rt2x00mac_get_ringparam(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 20c6eccce5a..9161c02d8ff 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -748,7 +748,7 @@ void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(rt2x00mac_rfkill_poll); -void rt2x00mac_flush(struct ieee80211_hw *hw, bool drop) +void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct rt2x00_dev *rt2x00dev = hw->priv; struct data_queue *queue; diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index d3ce9fbef00..b5a7a260bf6 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -1166,7 +1166,7 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw) * before switch channle or power save, or tx buffer packet * maybe send after offchannel or rf sleep, this may cause * dis-association by AP */ -static void rtl_op_flush(struct ieee80211_hw *hw, bool drop) +static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct rtl_priv *rtlpriv = rtl_priv(hw); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index d7e306333f6..a9f7041c719 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4946,7 +4946,7 @@ out: mutex_unlock(&wl->mutex); } -static void wlcore_op_flush(struct ieee80211_hw *hw, bool drop) +static void wlcore_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) { struct wl1271 *wl = hw->priv; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0b912d22f82..4158da74e11 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2438,8 +2438,11 @@ enum ieee80211_roc_type { * @testmode_dump: Implement a cfg80211 test mode dump. The callback can sleep. * * @flush: Flush all pending frames from the hardware queue, making sure - * that the hardware queues are empty. If the parameter @drop is set - * to %true, pending frames may be dropped. The callback can sleep. + * that the hardware queues are empty. The @queues parameter is a bitmap + * of queues to flush, which is useful if different virtual interfaces + * use different hardware queues; it may also indicate all queues. + * If the parameter @drop is set to %true, pending frames may be dropped. + * The callback can sleep. * * @channel_switch: Drivers that need (or want) to offload the channel * switch operation for CSAs received from the AP may implement this @@ -2687,7 +2690,7 @@ struct ieee80211_ops { struct netlink_callback *cb, void *data, int len); #endif - void (*flush)(struct ieee80211_hw *hw, bool drop); + void (*flush)(struct ieee80211_hw *hw, u32 queues, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); int (*napi_poll)(struct ieee80211_hw *hw, int budget); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 7b9ff53bd2e..169664c122e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -720,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) local->ops->rfkill_poll(&local->hw); } -static inline void drv_flush(struct ieee80211_local *local, bool drop) +static inline void drv_flush(struct ieee80211_local *local, + u32 queues, bool drop) { might_sleep(); - trace_drv_flush(local, drop); + trace_drv_flush(local, queues, drop); if (local->ops->flush) - local->ops->flush(&local->hw, drop); + local->ops->flush(&local->hw, queues, drop); trace_drv_return_void(local); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d6e92060982..b96c0e97775 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1540,6 +1540,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, { ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); } +void ieee80211_flush_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 80e838bc875..d646e12e55a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -92,7 +92,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->hw.conf.flags |= IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fdc06e381c1..65b38e13eb0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1436,7 +1436,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) else { ieee80211_send_nullfunc(local, sdata, 1); /* Flush to get the tx status of nullfunc frame */ - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); } } @@ -1767,7 +1767,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ if (tx) - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); /* deauthenticate/disassociate now */ if (tx || frame_buf) @@ -1776,7 +1776,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* flush out frame */ if (tx) - drv_flush(local, false); + ieee80211_flush_queues(local, sdata); /* clear bssid only after building the needed mgmt frames */ memset(ifmgd->bssid, 0, ETH_ALEN); @@ -1948,7 +1948,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); run_again(ifmgd, ifmgd->probe_timeout); if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - drv_flush(sdata->local, false); + ieee80211_flush_queues(sdata->local, sdata); } static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index db547fceaeb..d32f514074b 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -120,7 +120,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) */ ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { @@ -373,7 +373,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_roc_notify_destroy(roc); if (started) { - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; ieee80211_hw_config(local, 0); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index b471a67f224..497f21a0d11 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -35,7 +35,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* flush out all packets */ synchronize_net(); - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); local->quiescing = true; /* make quiescing visible to timers everywhere */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5dc17c623f7..cb34cbbaa20 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -332,7 +332,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) ieee80211_offchannel_stop_vifs(local); /* ensure nullfunc is transmitted before leaving operating channel */ - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); ieee80211_configure_filter(local); @@ -668,7 +668,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local, ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { - drv_flush(local, false); + ieee80211_flush_queues(local, NULL); *next_delay = 0; } else *next_delay = HZ / 10; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d97e4305cf1..c5899797a8d 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -964,23 +964,26 @@ TRACE_EVENT(drv_get_survey, ); TRACE_EVENT(drv_flush, - TP_PROTO(struct ieee80211_local *local, bool drop), + TP_PROTO(struct ieee80211_local *local, + u32 queues, bool drop), - TP_ARGS(local, drop), + TP_ARGS(local, queues, drop), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, drop) + __field(u32, queues) ), TP_fast_assign( LOCAL_ASSIGN; __entry->drop = drop; + __entry->queues = queues; ), TP_printk( - LOCAL_PR_FMT " drop:%d", - LOCAL_PR_ARG, __entry->drop + LOCAL_PR_FMT " queues:0x%x drop:%d", + LOCAL_PR_ARG, __entry->queues, __entry->drop ) ); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b7a856e3281..f978ddd1bb4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -511,6 +511,31 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_wake_queues); +void ieee80211_flush_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + u32 queues; + + if (!local->ops->flush) + return; + + if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { + int ac; + + queues = 0; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + queues |= BIT(sdata->vif.hw_queue[ac]); + if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE) + queues |= BIT(sdata->vif.cab_queue); + } else { + /* all queues */ + queues = BIT(local->hw.queues) - 1; + } + + drv_flush(local, queues, false); +} + void ieee80211_iterate_active_interfaces( struct ieee80211_hw *hw, u32 iter_flags, void (*iterator)(void *data, u8 *mac, -- cgit v1.2.3-70-g09d2 From 445ea4e83ec50668cc9ad7e5cf96d242f19165e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Feb 2013 12:25:28 +0100 Subject: mac80211: stop queues temporarily for flushing Sometimes queues are flushed in the middle of operation, which can lead to driver issues. Stop queues temporarily, while flushing, to avoid transmitting new packets while they are being flushed. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/main.c | 4 ++-- net/mac80211/mlme.c | 4 ++++ net/mac80211/offchannel.c | 4 ++-- net/mac80211/pm.c | 4 +++- net/mac80211/tx.c | 1 + net/mac80211/util.c | 23 ++++++++++++++++------- 8 files changed, 33 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4158da74e11..dd73b8c6746 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -93,9 +93,11 @@ struct device; * enum ieee80211_max_queues - maximum number of queues * * @IEEE80211_MAX_QUEUES: Maximum number of regular device queues. + * @IEEE80211_MAX_QUEUE_MAP: bitmap with maximum queues set */ enum ieee80211_max_queues { IEEE80211_MAX_QUEUES = 16, + IEEE80211_MAX_QUEUE_MAP = BIT(IEEE80211_MAX_QUEUES) - 1, }; #define IEEE80211_INVAL_HW_QUEUE 0xff diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b96c0e97775..ae2d1754b79 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -809,6 +809,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, + IEEE80211_QUEUE_STOP_REASON_FLUSH, }; #ifdef CONFIG_MAC80211_LEDS @@ -1522,8 +1523,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason); void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason); void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index eee1768e89c..c6f81ecc36a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -277,8 +277,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) "Hardware restart was requested\n"); /* use this reason, ieee80211_reconfig will unblock it */ - ieee80211_stop_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* * Stop all Rx during the reconfig. We don't want state changes diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 65b38e13eb0..4d383a93ea7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1009,6 +1009,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); out: ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; @@ -1108,6 +1109,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (sw_elem->mode) ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); if (sdata->local->ops->channel_switch) { @@ -1375,6 +1377,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work) } ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_PS); } @@ -2071,6 +2074,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&ifmgd->mtx); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index d32f514074b..b01eb7314ec 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -118,7 +118,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) * Stop queues and transmit all frames queued by the driver * before sending nullfunc to enable powersave at the AP. */ - ieee80211_stop_queues_by_reason(&local->hw, + ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); ieee80211_flush_queues(local, NULL); @@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); - ieee80211_wake_queues_by_reason(&local->hw, + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 497f21a0d11..3d16f4e6174 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -30,7 +30,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } ieee80211_stop_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* flush out all packets */ synchronize_net(); @@ -68,6 +69,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) mutex_unlock(&local->sta_mtx); } ieee80211_wake_queues_by_reason(hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_SUSPEND); return err; } else if (err > 0) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3fcdf211810..2a6ae8030bd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_PS); ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; ieee80211_queue_work(&local->hw, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f978ddd1bb4..a7368870c8e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, } void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, - enum queue_stop_reason reason) + unsigned long queues, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < hw->queues; i++) + for_each_set_bit(i, &queues, hw->queues) __ieee80211_stop_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_stop_queues(struct ieee80211_hw *hw) { - ieee80211_stop_queues_by_reason(hw, + ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_stop_queues); @@ -491,6 +492,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) EXPORT_SYMBOL(ieee80211_queue_stopped); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + unsigned long queues, enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); @@ -499,7 +501,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < hw->queues; i++) + for_each_set_bit(i, &queues, hw->queues) __ieee80211_wake_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -507,7 +509,8 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_wake_queues(struct ieee80211_hw *hw) { - ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER); + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_wake_queues); @@ -533,7 +536,13 @@ void ieee80211_flush_queues(struct ieee80211_local *local, queues = BIT(local->hw.queues) - 1; } + ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_FLUSH); + drv_flush(local, queues, false); + + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_FLUSH); } void ieee80211_iterate_active_interfaces( @@ -1676,8 +1685,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } - ieee80211_wake_queues_by_reason(hw, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* * If this is for hw restart things are still running. -- cgit v1.2.3-70-g09d2 From b962abdc6531c8de837504ebc98139587162f223 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:08 +0200 Subject: ipvs: fix some sparse warnings Add missing __percpu annotations and make ip_vs_net_id static. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_core.c | 8 +------- net/netfilter/ipvs/ip_vs_est.c | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 68c69d54d39..29bc0557756 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -459,7 +459,7 @@ struct ip_vs_estimator { struct ip_vs_stats { struct ip_vs_stats_user ustats; /* statistics */ struct ip_vs_estimator est; /* estimator */ - struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ + struct ip_vs_cpu_stats __percpu *cpustats; /* per cpu counters */ spinlock_t lock; /* spin lock */ struct ip_vs_stats_user ustats0; /* reset values */ }; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 47edf5a40a5..3e5e80b809f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -int ip_vs_net_id __read_mostly; -#ifdef IP_VS_GENERIC_NETNS -EXPORT_SYMBOL(ip_vs_net_id); -#endif +static int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); @@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - struct net *net = - dev_net(skb_dst(skb)->dev); - if (!skb->dev) skb->dev = net->loopback_dev; icmpv6_send(skb, diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 0fac6017b6f..6bee6d0c73a 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -56,7 +56,7 @@ * Make a summary from each cpu */ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, - struct ip_vs_cpu_stats *stats) + struct ip_vs_cpu_stats __percpu *stats) { int i; -- cgit v1.2.3-70-g09d2 From dece40e848f6e022f960dc9de54be518928460c3 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 13 Mar 2013 23:40:14 +0000 Subject: netfilter: nf_conntrack: speed up module removal path if netns in use The patch introduces nf_conntrack_cleanup_net_list(), which cleanups nf_conntrack for a list of netns and calls synchronize_net() only once for them all. This should reduce netns destruction time. I've measured cleanup time for 1k dummy net ns. Here are the results: # modprobe nf_conntrack # time modprobe -r nf_conntrack real 0m10.337s user 0m0.000s sys 0m0.376s # modprobe nf_conntrack # time modprobe -r nf_conntrack real 0m5.661s user 0m0.000s sys 0m0.216s Signed-off-by: Vladimir Davydov Cc: Patrick McHardy Cc: "David S. Miller" Cc: "Eric W. Biederman" Acked-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 1 + net/netfilter/nf_conntrack_core.c | 46 +++++++++++++++++++++---------- net/netfilter/nf_conntrack_standalone.c | 16 +++++++---- 3 files changed, 43 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 930275fa2ea..fb2b6234e93 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -27,6 +27,7 @@ extern unsigned int nf_conntrack_in(struct net *net, extern int nf_conntrack_init_net(struct net *net); extern void nf_conntrack_cleanup_net(struct net *net); +extern void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); extern int nf_conntrack_proto_pernet_init(struct net *net); extern void nf_conntrack_proto_pernet_fini(struct net *net); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1068deb97c8..007e8c43d19 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1365,30 +1365,48 @@ void nf_conntrack_cleanup_end(void) */ void nf_conntrack_cleanup_net(struct net *net) { + LIST_HEAD(single); + + list_add(&net->exit_list, &single); + nf_conntrack_cleanup_net_list(&single); +} + +void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) +{ + int busy; + struct net *net; + /* * This makes sure all current packets have passed through * netfilter framework. Roll on, two-stage module * delete... */ synchronize_net(); - i_see_dead_people: - nf_ct_iterate_cleanup(net, kill_all, NULL); - nf_ct_release_dying_list(net); - if (atomic_read(&net->ct.count) != 0) { +i_see_dead_people: + busy = 0; + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_iterate_cleanup(net, kill_all, NULL); + nf_ct_release_dying_list(net); + if (atomic_read(&net->ct.count) != 0) + busy = 1; + } + if (busy) { schedule(); goto i_see_dead_people; } - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); - nf_conntrack_proto_pernet_fini(net); - nf_conntrack_helper_pernet_fini(net); - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_tstamp_pernet_fini(net); - nf_conntrack_acct_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); - kmem_cache_destroy(net->ct.nf_conntrack_cachep); - kfree(net->ct.slabname); - free_percpu(net->ct.stat); + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_proto_pernet_fini(net); + nf_conntrack_helper_pernet_fini(net); + nf_conntrack_ecache_pernet_fini(net); + nf_conntrack_tstamp_pernet_fini(net); + nf_conntrack_acct_pernet_fini(net); + nf_conntrack_expect_pernet_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); + free_percpu(net->ct.stat); + } } void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6bcce401fd1..6c69fbdb836 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -545,16 +545,20 @@ out_init: return ret; } -static void nf_conntrack_pernet_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) { - nf_conntrack_standalone_fini_sysctl(net); - nf_conntrack_standalone_fini_proc(net); - nf_conntrack_cleanup_net(net); + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) { + nf_conntrack_standalone_fini_sysctl(net); + nf_conntrack_standalone_fini_proc(net); + } + nf_conntrack_cleanup_net_list(net_exit_list); } static struct pernet_operations nf_conntrack_net_ops = { - .init = nf_conntrack_pernet_init, - .exit = nf_conntrack_pernet_exit, + .init = nf_conntrack_pernet_init, + .exit_batch = nf_conntrack_pernet_exit, }; static int __init nf_conntrack_standalone_init(void) -- cgit v1.2.3-70-g09d2 From 77f65ebdca506870d99bfabe52bde222511022ec Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 19 Mar 2013 10:18:11 +0000 Subject: packet: packet fanout rollover during socket overload Changes: v3->v2: rebase (no other changes) passes selftest v2->v1: read f->num_members only once fix bug: test rollover mode + flag Minimize packet drop in a fanout group. If one socket is full, roll over packets to another from the group. Maintain flow affinity during normal load using an rxhash fanout policy, while dispersing unexpected traffic storms that hit a single cpu, such as spoofed-source DoS flows. Rollover breaks affinity for flows arriving at saturated sockets during those conditions. The patch adds a fanout policy ROLLOVER that rotates between sockets, filling each socket before moving to the next. It also adds a fanout flag ROLLOVER. If passed along with any other fanout policy, the primary policy is applied until the chosen socket is full. Then, rollover selects another socket, to delay packet drop until the entire system is saturated. Probing sockets is not free. Selecting the last used socket, as rollover does, is a greedy approach that maximizes chance of success, at the cost of extreme load imbalance. In practice, with sufficiently long queues to absorb bursts, sockets are drained in parallel and load balance looks uniform in `top`. To avoid contention, scales counters with number of sockets and accesses them lockfree. Values are bounds checked to ensure correctness. Tested using an application with 9 threads pinned to CPUs, one socket per thread and sufficient busywork per packet operation to limits each thread to handling 32 Kpps. When sent 500 Kpps single UDP stream packets, a FANOUT_CPU setup processes 32 Kpps in total without this patch, 270 Kpps with the patch. Tested with read() and with a packet ring (V1). Also, passes psock_fanout.c unit test added to selftests. Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 2 + net/packet/af_packet.c | 109 +++++-- net/packet/internal.h | 3 +- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net-afpacket/Makefile | 18 ++ .../testing/selftests/net-afpacket/psock_fanout.c | 326 +++++++++++++++++++++ .../selftests/net-afpacket/run_afpackettests | 16 + 7 files changed, 451 insertions(+), 24 deletions(-) create mode 100644 tools/testing/selftests/net-afpacket/Makefile create mode 100644 tools/testing/selftests/net-afpacket/psock_fanout.c create mode 100644 tools/testing/selftests/net-afpacket/run_afpackettests (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index f9a60375f0d..8136658ea47 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -55,6 +55,8 @@ struct sockaddr_ll { #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 #define PACKET_FANOUT_CPU 2 +#define PACKET_FANOUT_ROLLOVER 3 +#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1d6793dbfba..bd0d14c97d4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -181,6 +181,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, @@ -973,11 +975,11 @@ static void *packet_current_rx_frame(struct packet_sock *po, static void *prb_lookup_block(struct packet_sock *po, struct packet_ring_buffer *rb, - unsigned int previous, + unsigned int idx, int status) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); - struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); + struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); if (status != BLOCK_STATUS(pbd)) return NULL; @@ -1041,6 +1043,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } +static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +{ + struct sock *sk = &po->sk; + bool has_room; + + if (po->prot_hook.func != tpacket_rcv) + return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) + <= sk->sk_rcvbuf; + + spin_lock(&sk->sk_receive_queue.lock); + if (po->tp_version == TPACKET_V3) + has_room = prb_lookup_block(po, &po->rx_ring, + po->rx_ring.prb_bdqc.kactive_blk_num, + TP_STATUS_KERNEL); + else + has_room = packet_lookup_frame(po, &po->rx_ring, + po->rx_ring.head, + TP_STATUS_KERNEL); + spin_unlock(&sk->sk_receive_queue.lock); + + return has_room; +} + static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); @@ -1066,16 +1091,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num) return x; } -static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_hash(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { - u32 idx, hash = skb->rxhash; - - idx = ((u64)hash * num) >> 32; - - return f->arr[idx]; + return (((u64)skb->rxhash) * num) >> 32; } -static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_lb(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { int cur, old; @@ -1083,14 +1108,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb while ((old = atomic_cmpxchg(&f->rr_cur, cur, fanout_rr_next(f, num))) != cur) cur = old; - return f->arr[cur]; + return cur; +} + +static unsigned int fanout_demux_cpu(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return smp_processor_id() % num; } -static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_rollover(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int idx, unsigned int skip, + unsigned int num) { - unsigned int cpu = smp_processor_id(); + unsigned int i, j; - return f->arr[cpu % num]; + i = j = min_t(int, f->next[idx], num - 1); + do { + if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { + if (i != j) + f->next[idx] = i; + return i; + } + if (++i == num) + i = 0; + } while (i != j); + + return idx; +} + +static bool fanout_has_flag(struct packet_fanout *f, u16 flag) +{ + return f->flags & (flag >> 8); } static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, @@ -1099,7 +1150,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_fanout *f = pt->af_packet_priv; unsigned int num = f->num_members; struct packet_sock *po; - struct sock *sk; + unsigned int idx; if (!net_eq(dev_net(dev), read_pnet(&f->net)) || !num) { @@ -1110,23 +1161,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, switch (f->type) { case PACKET_FANOUT_HASH: default: - if (f->defrag) { + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } skb_get_rxhash(skb); - sk = fanout_demux_hash(f, skb, num); + idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: - sk = fanout_demux_lb(f, skb, num); + idx = fanout_demux_lb(f, skb, num); break; case PACKET_FANOUT_CPU: - sk = fanout_demux_cpu(f, skb, num); + idx = fanout_demux_cpu(f, skb, num); + break; + case PACKET_FANOUT_ROLLOVER: + idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); break; } - po = pkt_sk(sk); + po = pkt_sk(f->arr[idx]); + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && + unlikely(!packet_rcv_has_room(po, skb))) { + idx = fanout_demux_rollover(f, skb, idx, idx, num); + po = pkt_sk(f->arr[idx]); + } return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } @@ -1175,10 +1234,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; - u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; + u8 flags = type_flags >> 8; int err; switch (type) { + case PACKET_FANOUT_ROLLOVER: + if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) + return -EINVAL; case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: @@ -1203,7 +1265,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } err = -EINVAL; - if (match && match->defrag != defrag) + if (match && match->flags != flags) goto out; if (!match) { err = -ENOMEM; @@ -1213,7 +1275,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; - match->defrag = defrag; + match->flags = flags; atomic_set(&match->rr_cur, 0); INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); @@ -3240,7 +3302,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_FANOUT: val = (po->fanout ? ((u32)po->fanout->id | - ((u32)po->fanout->type << 16)) : + ((u32)po->fanout->type << 16) | + ((u32)po->fanout->flags << 24)) : 0); break; case PACKET_TX_HAS_OFF: diff --git a/net/packet/internal.h b/net/packet/internal.h index e84cab8cb7a..e891f025a1b 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -77,10 +77,11 @@ struct packet_fanout { unsigned int num_members; u16 id; u8 type; - u8 defrag; + u8 flags; atomic_t rr_cur; struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; + int next[PACKET_FANOUT_MAX]; spinlock_t lock; atomic_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 7c6280f1cf4..7f50078d0e8 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -6,6 +6,7 @@ TARGETS += cpu-hotplug TARGETS += memory-hotplug TARGETS += efivarfs TARGETS += net-socket +TARGETS += net-afpacket all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/net-afpacket/Makefile b/tools/testing/selftests/net-afpacket/Makefile new file mode 100644 index 00000000000..45f2ffb7fda --- /dev/null +++ b/tools/testing/selftests/net-afpacket/Makefile @@ -0,0 +1,18 @@ +# Makefile for net-socket selftests + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall + +CFLAGS += -I../../../../usr/include/ + +AF_PACKET_PROGS = psock_fanout + +all: $(AF_PACKET_PROGS) +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +run_tests: all + @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" + +clean: + $(RM) $(AF_PACKET_PROGS) diff --git a/tools/testing/selftests/net-afpacket/psock_fanout.c b/tools/testing/selftests/net-afpacket/psock_fanout.c new file mode 100644 index 00000000000..09dbf93c53d --- /dev/null +++ b/tools/testing/selftests/net-afpacket/psock_fanout.c @@ -0,0 +1,326 @@ +/* + * Copyright 2013 Google Inc. + * Author: Willem de Bruijn (willemb@google.com) + * + * A basic test of packet socket fanout behavior. + * + * Control: + * - create fanout fails as expected with illegal flag combinations + * - join fanout fails as expected with diverging types or flags + * + * Datapath: + * Open a pair of packet sockets and a pair of INET sockets, send a known + * number of packets across the two INET sockets and count the number of + * packets enqueued onto the two packet sockets. + * + * The test currently runs for + * - PACKET_FANOUT_HASH + * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER + * - PACKET_FANOUT_ROLLOVER + * + * Todo: + * - datapath: PACKET_FANOUT_LB + * - datapath: PACKET_FANOUT_CPU + * - functionality: PACKET_FANOUT_FLAG_DEFRAG + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Hack: build even if local includes are old */ +#ifndef PACKET_FANOUT +#define PACKET_FANOUT 18 +#define PACKET_FANOUT_HASH 0 +#define PACKET_FANOUT_LB 1 +#define PACKET_FANOUT_CPU 2 +#define PACKET_FANOUT_FLAG_DEFRAG 0x8000 + +#ifndef PACKET_FANOUT_ROLLOVER +#define PACKET_FANOUT_ROLLOVER 3 +#endif + +#ifndef PACKET_FANOUT_FLAG_ROLLOVER +#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 +#endif + +#endif + +#define DATA_LEN 100 +#define DATA_CHAR 'a' + +static void pair_udp_open(int fds[], uint16_t port) +{ + struct sockaddr_in saddr, daddr; + + fds[0] = socket(PF_INET, SOCK_DGRAM, 0); + fds[1] = socket(PF_INET, SOCK_DGRAM, 0); + if (fds[0] == -1 || fds[1] == -1) { + fprintf(stderr, "ERROR: socket dgram\n"); + exit(1); + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_port = htons(port); + saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sin_family = AF_INET; + daddr.sin_port = htons(port + 1); + daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + /* must bind both to get consistent hash result */ + if (bind(fds[1], (void *) &daddr, sizeof(daddr))) { + perror("bind"); + exit(1); + } + if (bind(fds[0], (void *) &saddr, sizeof(saddr))) { + perror("bind"); + exit(1); + } + if (connect(fds[0], (void *) &daddr, sizeof(daddr))) { + perror("bind"); + exit(1); + } +} + +static void pair_udp_send(int fds[], int num) +{ + char buf[DATA_LEN], rbuf[DATA_LEN]; + + memset(buf, DATA_CHAR, sizeof(buf)); + while (num--) { + /* Should really handle EINTR and EAGAIN */ + if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) { + fprintf(stderr, "ERROR: send failed left=%d\n", num); + exit(1); + } + if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) { + fprintf(stderr, "ERROR: recv failed left=%d\n", num); + exit(1); + } + if (memcmp(buf, rbuf, sizeof(buf))) { + fprintf(stderr, "ERROR: data failed left=%d\n", num); + exit(1); + } + } +} + +static void sock_fanout_setfilter(int fd) +{ + struct sock_filter bpf_filter[] = { + { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ + { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ + { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ + { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */ + { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x6, 0, 0, 0x00000060 }, /* RET match */ +/* nomatch */ { 0x6, 0, 0, 0x00000000 }, /* RET no match */ + }; + struct sock_fprog bpf_prog; + + bpf_prog.filter = bpf_filter; + bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, + sizeof(bpf_prog))) { + perror("setsockopt SO_ATTACH_FILTER"); + exit(1); + } +} + +/* Open a socket in a given fanout mode. + * @return -1 if mode is bad, a valid socket otherwise */ +static int sock_fanout_open(uint16_t typeflags, int num_packets) +{ + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); + if (fd < 0) { + perror("socket packet"); + exit(1); + } + + /* fanout group ID is always 0: tests whether old groups are deleted */ + val = ((int) typeflags) << 16; + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (close(fd)) { + perror("close packet"); + exit(1); + } + return -1; + } + + val = sizeof(struct iphdr) + sizeof(struct udphdr) + DATA_LEN; + val *= num_packets; + /* hack: apparently, the above calculation is too small (TODO: fix) */ + val *= 3; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val))) { + perror("setsockopt SO_RCVBUF"); + exit(1); + } + + sock_fanout_setfilter(fd); + return fd; +} + +static void sock_fanout_read(int fds[], const int expect[]) +{ + struct tpacket_stats stats; + socklen_t ssize; + int ret[2]; + + ssize = sizeof(stats); + if (getsockopt(fds[0], SOL_PACKET, PACKET_STATISTICS, &stats, &ssize)) { + perror("getsockopt statistics 0"); + exit(1); + } + ret[0] = stats.tp_packets - stats.tp_drops; + ssize = sizeof(stats); + if (getsockopt(fds[1], SOL_PACKET, PACKET_STATISTICS, &stats, &ssize)) { + perror("getsockopt statistics 1"); + exit(1); + } + ret[1] = stats.tp_packets - stats.tp_drops; + + fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n", + ret[0], ret[1], expect[0], expect[1]); + + if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && + (!(ret[0] == expect[1] && ret[1] == expect[0]))) { + fprintf(stderr, "ERROR: incorrect queue lengths\n"); + exit(1); + } +} + +/* Test illegal mode + flag combination */ +static void test_control_single(void) +{ + fprintf(stderr, "test: control single socket\n"); + + if (sock_fanout_open(PACKET_FANOUT_ROLLOVER | + PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { + fprintf(stderr, "ERROR: opened socket with dual rollover\n"); + exit(1); + } +} + +/* Test illegal group with different modes or flags */ +static void test_control_group(void) +{ + int fds[2]; + + fprintf(stderr, "test: control multiple sockets\n"); + + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed to open HASH socket\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong mode\n"); + exit(1); + } + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + if (close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + +static void test_datapath(uint16_t typeflags, + const int expect1[], const int expect2[]) +{ + const int expect0[] = { 0, 0 }; + int fds[2], fds_udp[2][2]; + + fprintf(stderr, "test: datapath 0x%hx\n", typeflags); + + fds[0] = sock_fanout_open(typeflags, 20); + fds[1] = sock_fanout_open(typeflags, 20); + if (fds[0] == -1 || fds[1] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + pair_udp_open(fds_udp[0], 8000); + pair_udp_open(fds_udp[1], 8002); + sock_fanout_read(fds, expect0); + + /* Send data, but not enough to overflow a queue */ + pair_udp_send(fds_udp[0], 15); + pair_udp_send(fds_udp[1], 5); + sock_fanout_read(fds, expect1); + + /* Send more data, overflow the queue */ + pair_udp_send(fds_udp[0], 15); + /* TODO: ensure consistent order between expect1 and expect2 */ + sock_fanout_read(fds, expect2); + + if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || + close(fds_udp[0][1]) || close(fds_udp[0][0]) || + close(fds[1]) || close(fds[0])) { + fprintf(stderr, "close datapath\n"); + exit(1); + } +} + +int main(int argc, char **argv) +{ + const int expect_hash[2][2] = { { 15, 5 }, { 5, 0 } }; + const int expect_hash_rb[2][2] = { { 15, 5 }, { 5, 10 } }; + const int expect_rb[2][2] = { { 20, 0 }, { 0, 15 } }; + + test_control_single(); + test_control_group(); + + test_datapath(PACKET_FANOUT_HASH, expect_hash[0], expect_hash[1]); + test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, + expect_hash_rb[0], expect_hash_rb[1]); + test_datapath(PACKET_FANOUT_ROLLOVER, expect_rb[0], expect_rb[1]); + + printf("OK. All tests passed\n"); + return 0; +} diff --git a/tools/testing/selftests/net-afpacket/run_afpackettests b/tools/testing/selftests/net-afpacket/run_afpackettests new file mode 100644 index 00000000000..7907824c635 --- /dev/null +++ b/tools/testing/selftests/net-afpacket/run_afpackettests @@ -0,0 +1,16 @@ +#!/bin/sh + +if [ $(id -u) != 0 ]; then + echo $msg must be run as root >&2 + exit 0 +fi + +echo "--------------------" +echo "running psock_fanout test" +echo "--------------------" +./psock_fanout +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi -- cgit v1.2.3-70-g09d2 From f77668dc25b27270fe589031b22c432c3462b1d8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Mar 2013 06:39:30 +0000 Subject: net: flow_dissector: add __skb_get_poff to get a start offset to payload __skb_get_poff() returns the offset to the payload as far as it could be dissected. The main user is currently BPF, so that we can dynamically truncate packets without needing to push actual payload to the user space and instead can analyze headers only. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/flow_dissector.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b66ecc6ef10..497412165b1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2840,6 +2840,8 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); +u32 __skb_get_poff(const struct sk_buff *skb); + /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f4be293bab9..00ee068efc1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -228,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, } EXPORT_SYMBOL(__skb_tx_hash); +/* __skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 __skb_get_poff(const struct sk_buff *skb) +{ + struct flow_keys keys; + u32 poff = 0; + + if (!skb_flow_dissect(skb, &keys)) + return 0; + + poff += keys.thoff; + switch (keys.ip_proto) { + case IPPROTO_TCP: { + const struct tcphdr *tcph; + struct tcphdr _tcph; + + tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); + if (!tcph) + return poff; + + poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + break; + } + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + poff += sizeof(struct udphdr); + break; + /* For the rest, we do not really care about header + * extensions at this point for now. + */ + case IPPROTO_ICMP: + poff += sizeof(struct icmphdr); + break; + case IPPROTO_ICMPV6: + poff += sizeof(struct icmp6hdr); + break; + case IPPROTO_IGMP: + poff += sizeof(struct igmphdr); + break; + case IPPROTO_DCCP: + poff += sizeof(struct dccp_hdr); + break; + case IPPROTO_SCTP: + poff += sizeof(struct sctphdr); + break; + } + + return poff; +} + static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { -- cgit v1.2.3-70-g09d2 From 3e5289d5e3f98b7b5b8cac32e9e5a7004c067436 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Mar 2013 06:39:31 +0000 Subject: filter: add ANC_PAY_OFFSET instruction for loading payload start offset It is very useful to do dynamic truncation of packets. In particular, we're interested to push the necessary header bytes to the user space and cut off user payload that should probably not be transferred for some reasons (e.g. privacy, speed, or others). With the ancillary extension PAY_OFFSET, we can load it into the accumulator, and return it. E.g. in bpfc syntax ... ld #poff ; { 0x20, 0, 0, 0xfffff034 }, ret a ; { 0x16, 0, 0, 0x00000000 }, ... as a filter will accomplish this without having to do a big hackery in a BPF filter itself. Follow-up JIT implementations are welcome. Thanks to Eric Dumazet for suggesting and discussing this during the Netfilter Workshop in Copenhagen. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + include/uapi/linux/filter.h | 3 ++- net/core/filter.c | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index c45eabc135e..d2059cb4e46 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -126,6 +126,7 @@ enum { BPF_S_ANC_SECCOMP_LD_W, BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, + BPF_S_ANC_PAY_OFFSET, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 9cfde694109..8eb9ccaa5b4 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -129,7 +129,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_ALU_XOR_X 40 #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 -#define SKF_AD_MAX 52 +#define SKF_AD_PAY_OFFSET 52 +#define SKF_AD_MAX 56 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index 2e20b55a783..dad2a178f9f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,6 +348,9 @@ load_b: case BPF_S_ANC_VLAN_TAG_PRESENT: A = !!vlan_tx_tag_present(skb); continue; + case BPF_S_ANC_PAY_OFFSET: + A = __skb_get_poff(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(ALU_XOR_X); ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); + ANCILLARY(PAY_OFFSET); } /* ancillary operation unknown or unsupported */ @@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, -- cgit v1.2.3-70-g09d2 From 2b5faa4c553f90ee2dde1d976b220b1ca9741ef0 Mon Sep 17 00:00:00 2001 From: Jesper Derehag Date: Tue, 19 Mar 2013 20:50:05 +0000 Subject: connector: Added coredumping event to the process connector Process connector can now also detect coredumping events. Main aim of patch is get notified at start of coredumping, instead of having to wait for it to finish and then being notified through EXIT event. Could be used for instance by process-managers that want to get notified as soon as possible about process failures, and not necessarily beeing notified after coredump, which could be in the order of minutes depending on size of coredump, piping and so on. Signed-off-by: Jesper Derehag Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 25 +++++++++++++++++++++++++ include/linux/cn_proc.h | 4 ++++ include/uapi/linux/cn_proc.h | 10 +++++++++- kernel/signal.c | 2 ++ 4 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 1110478dd0f..08ae128cce9 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -232,6 +232,31 @@ void proc_comm_connector(struct task_struct *task) cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } +void proc_coredump_connector(struct task_struct *task) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + struct timespec ts; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->what = PROC_EVENT_COREDUMP; + ev->event_data.coredump.process_pid = task->pid; + ev->event_data.coredump.process_tgid = task->tgid; + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + void proc_exit_connector(struct task_struct *task) { struct cn_msg *msg; diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h index 2c1bc1ea04e..1d5b02a96c4 100644 --- a/include/linux/cn_proc.h +++ b/include/linux/cn_proc.h @@ -26,6 +26,7 @@ void proc_id_connector(struct task_struct *task, int which_id); void proc_sid_connector(struct task_struct *task); void proc_ptrace_connector(struct task_struct *task, int which_id); void proc_comm_connector(struct task_struct *task); +void proc_coredump_connector(struct task_struct *task); void proc_exit_connector(struct task_struct *task); #else static inline void proc_fork_connector(struct task_struct *task) @@ -48,6 +49,9 @@ static inline void proc_ptrace_connector(struct task_struct *task, int ptrace_id) {} +static inline void proc_coredump_connector(struct task_struct *task) +{} + static inline void proc_exit_connector(struct task_struct *task) {} #endif /* CONFIG_PROC_EVENTS */ diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h index 0d7b49973bb..f6c271035bb 100644 --- a/include/uapi/linux/cn_proc.h +++ b/include/uapi/linux/cn_proc.h @@ -56,7 +56,9 @@ struct proc_event { PROC_EVENT_PTRACE = 0x00000100, PROC_EVENT_COMM = 0x00000200, /* "next" should be 0x00000400 */ - /* "last" is the last process event: exit */ + /* "last" is the last process event: exit, + * while "next to last" is coredumping event */ + PROC_EVENT_COREDUMP = 0x40000000, PROC_EVENT_EXIT = 0x80000000 } what; __u32 cpu; @@ -110,11 +112,17 @@ struct proc_event { char comm[16]; } comm; + struct coredump_proc_event { + __kernel_pid_t process_pid; + __kernel_pid_t process_tgid; + } coredump; + struct exit_proc_event { __kernel_pid_t process_pid; __kernel_pid_t process_tgid; __u32 exit_code, exit_signal; } exit; + } event_data; }; diff --git a/kernel/signal.c b/kernel/signal.c index dd72567767d..497330ec2ae 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -32,6 +32,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -2350,6 +2351,7 @@ relock: if (sig_kernel_coredump(signr)) { if (print_fatal_signals) print_fatal_signal(info->si_signo); + proc_coredump_connector(current); /* * If it was able to dump core, this kills all * other threads in the group and synchronizes with -- cgit v1.2.3-70-g09d2 From 9b44190dc114c1720b34975b5bfc65aece112ced Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 20 Mar 2013 13:32:58 +0000 Subject: tcp: refactor F-RTO The patch series refactor the F-RTO feature (RFC4138/5682). This is to simplify the loss recovery processing. Existing F-RTO was developed during the experimental stage (RFC4138) and has many experimental features. It takes a separate code path from the traditional timeout processing by overloading CA_Disorder instead of using CA_Loss state. This complicates CA_Disorder state handling because it's also used for handling dubious ACKs and undos. While the algorithm in the RFC does not change the congestion control, the implementation intercepts congestion control in various places (e.g., frto_cwnd in tcp_ack()). The new code implements newer F-RTO RFC5682 using CA_Loss processing path. F-RTO becomes a small extension in the timeout processing and interfaces with congestion control and Eifel undo modules. It lets congestion control (module) determines how many to send independently. F-RTO only chooses what to send in order to detect spurious retranmission. If timeout is found spurious it invokes existing Eifel undo algorithms like DSACK or TCP timestamp based detection. The first patch removes all F-RTO code except the sysctl_tcp_frto is left for the new implementation. Since CA_EVENT_FRTO is removed, TCP westwood now computes ssthresh on regular timeout CA_EVENT_LOSS event. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 17 -- include/linux/tcp.h | 6 +- include/net/tcp.h | 4 - net/ipv4/sysctl_net_ipv4.c | 7 - net/ipv4/tcp_input.c | 375 +-------------------------------- net/ipv4/tcp_minisocks.c | 3 - net/ipv4/tcp_output.c | 11 +- net/ipv4/tcp_timer.c | 6 +- net/ipv4/tcp_westwood.c | 2 +- 9 files changed, 10 insertions(+), 421 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17953e2bc3e..8a977a0aaed 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -239,23 +239,6 @@ tcp_frto - INTEGER interacts badly with the packet counting of the SACK enabled TCP flow. -tcp_frto_response - INTEGER - When F-RTO has detected that a TCP retransmission timeout was - spurious (i.e, the timeout would have been avoided had TCP set a - longer retransmission timeout), TCP has several options what to do - next. Possible values are: - 0 Rate halving based; a smooth and conservative response, - results in halved cwnd and ssthresh after one RTT - 1 Very conservative response; not recommended because even - though being valid, it interacts poorly with the rest of - Linux TCP, halves cwnd and ssthresh immediately - 2 Aggressive response; undoes congestion control measures - that are now known to be unnecessary (ignoring the - possibility of a lost retransmission that would require - TCP to be more cautious), cwnd and ssthresh are restored - to the values prior timeout - Default: 0 (rate halving based) - tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. Default: 2hours. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ed6a7456eec..f5f203b3637 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -187,14 +187,12 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ - u32 frto_highmark; /* snd_nxt when RTO occurred */ u16 advmss; /* Advertised MSS */ - u8 frto_counter; /* Number of new acks after RTO */ + u8 unused; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ - repair : 1, - unused : 1; + repair : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 7f2f17198d7..d1dcb596230 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -272,7 +272,6 @@ extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; -extern int sysctl_tcp_frto_response; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; @@ -424,8 +423,6 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, bool fastopen); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); -extern bool tcp_use_frto(struct sock *sk); -extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); @@ -756,7 +753,6 @@ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ - CA_EVENT_FRTO, /* fast recovery timeout */ CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_FAST_ACK, /* in sequence ack */ CA_EVENT_SLOW_ACK, /* other ack */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cb45062c8be..fa2f63fc453 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -591,13 +591,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_frto_response", - .data = &sysctl_tcp_frto_response, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 19f0149fb6a..231c79fe91f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,7 +93,6 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; -int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; @@ -108,17 +107,14 @@ int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ -#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ -#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) -#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -1159,10 +1155,6 @@ static u8 tcp_sacktag_one(struct sock *sk, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); - - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(end_seq, tp->frto_highmark)) - state->flag |= FLAG_ONLY_ORIG_SACKED; } if (sacked & TCPCB_LOST) { @@ -1555,7 +1547,6 @@ static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); @@ -1728,12 +1719,6 @@ walk: start_seq, end_seq, dup_sack); advance_sp: - /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct - * due to in-order walk - */ - if (after(end_seq, tp->frto_highmark)) - state.flag &= ~FLAG_ONLY_ORIG_SACKED; - i++; } @@ -1750,8 +1735,7 @@ advance_sp: tcp_verify_left_out(tp); if ((state.reord < tp->fackets_out) && - ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && - (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) + ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); out: @@ -1825,197 +1809,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static int tcp_is_sackfrto(const struct tcp_sock *tp) -{ - return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); -} - -/* F-RTO can only be used if TCP has never retransmitted anything other than - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) - */ -bool tcp_use_frto(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - - if (!sysctl_tcp_frto) - return false; - - /* MTU probe and F-RTO won't really play nicely along currently */ - if (icsk->icsk_mtup.probe_size) - return false; - - if (tcp_is_sackfrto(tp)) - return true; - - /* Avoid expensive walking of rexmit queue if possible */ - if (tp->retrans_out > 1) - return false; - - skb = tcp_write_queue_head(sk); - if (tcp_skb_is_last(sk, skb)) - return true; - skb = tcp_write_queue_next(sk, skb); /* Skips head */ - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - return false; - /* Short-circuit when first non-SACKed skb has been checked */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) - break; - } - return true; -} - -/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO - * recovery a bit and use heuristics in tcp_process_frto() to detect if - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to - * keep retrans_out counting accurate (with SACK F-RTO, other than head - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS - * bits are handled if the Loss state is really to be entered (in - * tcp_enter_frto_loss). - * - * Do like tcp_enter_loss() would; when RTO expires the second time it - * does: - * "Reduce ssthresh if it has not yet been made inside this window." - */ -void tcp_enter_frto(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || - tp->snd_una == tp->high_seq || - ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && - !icsk->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(sk); - /* Our state is too optimistic in ssthresh() call because cwnd - * is not reduced until tcp_enter_frto_loss() when previous F-RTO - * recovery has not yet completed. Pattern would be this: RTO, - * Cumulative ACK, RTO (2xRTO for the same segment does not end - * up here twice). - * RFC4138 should be more specific on what to do, even though - * RTO is quite unlikely to occur after the first Cumulative ACK - * due to back-off and complexity of triggering events ... - */ - if (tp->frto_counter) { - u32 stored_cwnd; - stored_cwnd = tp->snd_cwnd; - tp->snd_cwnd = 2; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = stored_cwnd; - } else { - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - } - /* ... in theory, cong.control module could do "any tricks" in - * ssthresh(), which means that ca_state, lost bits and lost_out - * counter would have to be faked before the call occurs. We - * consider that too expensive, unlikely and hacky, so modules - * using these in ssthresh() must deal these incompatibility - * issues if they receives CA_EVENT_FRTO and frto_counter != 0 - */ - tcp_ca_event(sk, CA_EVENT_FRTO); - } - - tp->undo_marker = tp->snd_una; - tp->undo_retrans = 0; - - skb = tcp_write_queue_head(sk); - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_verify_left_out(tp); - - /* Too bad if TCP was application limited */ - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - - /* Earlier loss recovery underway (see RFC4138; Appendix B). - * The last condition is necessary at least in tp->frto_counter case. - */ - if (tcp_is_sackfrto(tp) && (tp->frto_counter || - ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && - after(tp->high_seq, tp->snd_una)) { - tp->frto_highmark = tp->high_seq; - } else { - tp->frto_highmark = tp->snd_nxt; - } - tcp_set_ca_state(sk, TCP_CA_Disorder); - tp->high_seq = tp->snd_nxt; - tp->frto_counter = 1; -} - -/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, - * which indicates that we should follow the traditional RTO recovery, - * i.e. mark everything lost and do go-back-N retransmission. - */ -static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - tp->lost_out = 0; - tp->retrans_out = 0; - if (tcp_is_reno(tp)) - tcp_reset_reno_sack(tp); - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - /* - * Count the retransmission made on RTO correctly (only when - * waiting for the first ACK and did not get it)... - */ - if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { - /* For some reason this R-bit might get cleared? */ - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out += tcp_skb_pcount(skb); - /* ...enter this if branch just for the first segment */ - flag |= FLAG_DATA_ACKED; - } else { - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - } - - /* Marking forward transmissions that were made after RTO lost - * can cause unnecessary retransmissions in some scenarios, - * SACK blocks will mitigate that in some but not in all cases. - * We used to not mark them but it was causing break-ups with - * receivers that do only in-order receival. - * - * TODO: we could detect presence of such receiver and select - * different behavior per flow. - */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; - } - } - tcp_verify_left_out(tp); - - tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; - tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - tp->frto_counter = 0; - - tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); - tcp_set_ca_state(sk, TCP_CA_Loss); - tp->high_seq = tp->snd_nxt; - TCP_ECN_queue_cwr(tp); - - tcp_clear_all_retrans_hints(tp); -} - static void tcp_clear_retrans_partial(struct tcp_sock *tp) { tp->retrans_out = 0; @@ -2090,8 +1883,6 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - /* Abort F-RTO algorithm if one is in progress */ - tp->frto_counter = 0; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2275,10 +2066,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; - /* Do not perform any recovery during F-RTO algorithm */ - if (tp->frto_counter) - return false; - /* Trick#1: The loss is proven. */ if (tp->lost_out) return true; @@ -2760,7 +2547,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) tcp_verify_left_out(tp); - if (!tp->frto_counter && !tcp_any_retrans_done(sk)) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -3198,8 +2985,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_RETRANS_DATA_ACKED; ca_seq_rtt = -1; seq_rtt = -1; - if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) - flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; last_ackt = skb->tstamp; @@ -3408,150 +3193,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } -/* A very conservative spurious RTO response algorithm: reduce cwnd and - * continue in congestion avoidance. - */ -static void tcp_conservative_spur_to_response(struct tcp_sock *tp) -{ - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_cnt = 0; - TCP_ECN_queue_cwr(tp); - tcp_moderate_cwnd(tp); -} - -/* A conservative spurious RTO response algorithm: reduce cwnd using - * PRR and continue in congestion avoidance. - */ -static void tcp_cwr_spur_to_response(struct sock *sk) -{ - tcp_enter_cwr(sk, 0); -} - -static void tcp_undo_spur_to_response(struct sock *sk, int flag) -{ - if (flag & FLAG_ECE) - tcp_cwr_spur_to_response(sk); - else - tcp_undo_cwr(sk, true); -} - -/* F-RTO spurious RTO detection algorithm (RFC4138) - * - * F-RTO affects during two new ACKs following RTO (well, almost, see inline - * comments). State (ACK number) is kept in frto_counter. When ACK advances - * window (but not to or beyond highest sequence sent before RTO): - * On First ACK, send two new segments out. - * On Second ACK, RTO was likely spurious. Do spurious response (response - * algorithm is not part of the F-RTO detection algorithm - * given in RFC4138 but can be selected separately). - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding - * of Nagle, this is done using frto_counter states 2 and 3, when a new data - * segment of any size sent during F-RTO, state 2 is upgraded to 3. - * - * Rationale: if the RTO was spurious, new ACKs should arrive from the - * original window even after we transmit two new data segments. - * - * SACK version: - * on first step, wait until first cumulative ACK arrives, then move to - * the second step. In second step, the next ACK decides. - * - * F-RTO is implemented (mainly) in four functions: - * - tcp_use_frto() is used to determine if TCP is can use F-RTO - * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is - * called when tcp_use_frto() showed green light - * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm - * - tcp_enter_frto_loss() is called if there is not enough evidence - * to prove that the RTO is indeed spurious. It transfers the control - * from F-RTO to the conventional RTO recovery - */ -static bool tcp_process_frto(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_verify_left_out(tp); - - /* Duplicate the behavior from Loss state (fastretrans_alert) */ - if (flag & FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; - - if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || - ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) - tp->undo_marker = 0; - - if (!before(tp->snd_una, tp->frto_highmark)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); - return true; - } - - if (!tcp_is_sackfrto(tp)) { - /* RFC4138 shortcoming in step 2; should also have case c): - * ACK isn't duplicate nor advances window, e.g., opposite dir - * data, winupdate - */ - if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) - return true; - - if (!(flag & FLAG_DATA_ACKED)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), - flag); - return true; - } - } else { - if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { - if (!tcp_packets_in_flight(tp)) { - tcp_enter_frto_loss(sk, 2, flag); - return true; - } - - /* Prevent sending of new data. */ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)); - return true; - } - - if ((tp->frto_counter >= 2) && - (!(flag & FLAG_FORWARD_PROGRESS) || - ((flag & FLAG_DATA_SACKED) && - !(flag & FLAG_ONLY_ORIG_SACKED)))) { - /* RFC4138 shortcoming (see comment above) */ - if (!(flag & FLAG_FORWARD_PROGRESS) && - (flag & FLAG_NOT_DUP)) - return true; - - tcp_enter_frto_loss(sk, 3, flag); - return true; - } - } - - if (tp->frto_counter == 1) { - /* tcp_may_send_now needs to see updated state */ - tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; - tp->frto_counter = 2; - - if (!tcp_may_send_now(sk)) - tcp_enter_frto_loss(sk, 2, flag); - - return true; - } else { - switch (sysctl_tcp_frto_response) { - case 2: - tcp_undo_spur_to_response(sk, flag); - break; - case 1: - tcp_conservative_spur_to_response(tp); - break; - default: - tcp_cwr_spur_to_response(sk); - break; - } - tp->frto_counter = 0; - tp->undo_marker = 0; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); - } - return false; -} - /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk) { @@ -3616,7 +3257,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - bool frto_cwnd = false; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3690,22 +3330,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) pkts_acked = prior_packets - tp->packets_out; - if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, flag); - /* Guarantee sacktag reordering detection against wrap-arounds */ - if (before(tp->frto_highmark, tp->snd_una)) - tp->frto_highmark = 0; - if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && - tcp_may_raise_cwnd(sk, flag)) + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) + if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8f0234f8bb9..05eaf890461 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -422,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->frto_counter = 0; - newtp->frto_highmark = 0; - if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && !try_module_get(newicsk->icsk_ca_ops->owner)) newicsk->icsk_ca_ops = &tcp_init_congestion_ops; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e787ecec505..163cf5fc011 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -78,10 +78,6 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - /* Don't override Nagle indefinitely with F-RTO */ - if (tp->frto_counter == 2) - tp->frto_counter = 3; - tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) @@ -1470,11 +1466,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (nonagle & TCP_NAGLE_PUSH) return true; - /* Don't use the nagle rule for urgent data (or for the final FIN). - * Nagle can be ignored during F-RTO too (see RFC4138). - */ - if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) + /* Don't use the nagle rule for urgent data (or for the final FIN). */ + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index eeccf795e91..4b85e6f636c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -416,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), mib_idx); } - if (tcp_use_frto(sk)) { - tcp_enter_frto(sk); - } else { - tcp_enter_loss(sk, 0); - } + tcp_enter_loss(sk, 0); if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { /* Retransmission failed because of local congestion, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 1b91bf48e27..76a1e23259e 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); break; - case CA_EVENT_FRTO: + case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; -- cgit v1.2.3-70-g09d2 From e33099f96d99c391b3325caa9c44258de04aae86 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 20 Mar 2013 13:33:00 +0000 Subject: tcp: implement RFC5682 F-RTO This patch implements F-RTO (foward RTO recovery): When the first retransmission after timeout is acknowledged, F-RTO sends new data instead of old data. If the next ACK acknowledges some never-retransmitted data, then the timeout was spurious and the congestion state is reverted. Otherwise if the next ACK selectively acknowledges the new data, then the timeout was genuine and the loss recovery continues. This idea applies to recurring timeouts as well. While F-RTO sends different data during timeout recovery, it does not (and should not) change the congestion control. The implementaion follows the three steps of SACK enhanced algorithm (section 3) in RFC5682. Step 1 is in tcp_enter_loss(). Step 2 and 3 are in tcp_process_loss(). The basic version is not supported because SACK enhanced version also works for non-SACK connections. The new implementation is functionally in parity with the old F-RTO implementation except the one case where it increases undo events: In addition to the RFC algorithm, a spurious timeout may be detected without sending data in step 2, as long as the SACK confirms not all the original data are dropped. When this happens, the sender will undo the cwnd and perhaps enter fast recovery instead. This additional check increases the F-RTO undo events by 5x compared to the prior implementation on Google Web servers, since the sender often does not have new data to send for HTTP. Note F-RTO may detect spurious timeout before Eifel with timestamps does so. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 18 +++------ include/linux/tcp.h | 3 +- net/ipv4/tcp_input.c | 73 ++++++++++++++++++++++++++++------ 3 files changed, 68 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 8a977a0aaed..f98ca633b52 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -225,19 +225,13 @@ tcp_fin_timeout - INTEGER Default: 60 seconds tcp_frto - INTEGER - Enables Forward RTO-Recovery (F-RTO) defined in RFC4138. + Enables Forward RTO-Recovery (F-RTO) defined in RFC5682. F-RTO is an enhanced recovery algorithm for TCP retransmission - timeouts. It is particularly beneficial in wireless environments - where packet loss is typically due to random radio interference - rather than intermediate router congestion. F-RTO is sender-side - only modification. Therefore it does not require any support from - the peer. - - If set to 1, basic version is enabled. 2 enables SACK enhanced - F-RTO if flow uses SACK. The basic version can be used also when - SACK is in use though scenario(s) with it exists where F-RTO - interacts badly with the packet counting of the SACK enabled TCP - flow. + timeouts. It is particularly beneficial in networks where the + RTT fluctuates (e.g., wireless). F-RTO is sender-side only + modification. It does not require any support from the peer. + + By default it's enabled with a non-zero value. 0 disables F-RTO. tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f5f203b3637..5adbc33d1ab 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -192,7 +192,8 @@ struct tcp_sock { u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ - repair : 1; + repair : 1, + frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8d821e45b91..b2b36196b34 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -107,6 +107,7 @@ int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ @@ -1155,6 +1156,8 @@ static u8 tcp_sacktag_one(struct sock *sk, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); + if (!after(end_seq, tp->high_seq)) + state->flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_LOST) { @@ -1835,10 +1838,13 @@ void tcp_enter_loss(struct sock *sk, int how) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + bool new_recovery = false; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || + !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + new_recovery = true; tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); @@ -1883,6 +1889,14 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); + + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + */ + tp->frto = sysctl_tcp_frto && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2426,12 +2440,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) return failed; } -/* Undo during loss recovery after partial ACK. */ -static bool tcp_try_undo_loss(struct sock *sk) +/* Undo during loss recovery after partial ACK or using F-RTO. */ +static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_may_undo(tp)) { + if (frto_undo || tcp_may_undo(tp)) { struct sk_buff *skb; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -2445,9 +2459,12 @@ static bool tcp_try_undo_loss(struct sock *sk) tp->lost_out = 0; tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); + if (frto_undo) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; - if (tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; } @@ -2667,24 +2684,52 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ -static void tcp_process_loss(struct sock *sk, int flag) +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + bool recovered = !before(tp->snd_una, tp->high_seq); - if (!before(tp->snd_una, tp->high_seq)) { + if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ + if (flag & FLAG_ORIG_SACK_ACKED) { + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + tcp_try_undo_loss(sk, true); + return; + } + if (after(tp->snd_nxt, tp->high_seq) && + (flag & FLAG_DATA_SACKED || is_dupack)) { + tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ + } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { + tp->high_seq = tp->snd_nxt; + __tcp_push_pending_frames(sk, tcp_current_mss(sk), + TCP_NAGLE_OFF); + if (after(tp->snd_nxt, tp->high_seq)) + return; /* Step 2.b */ + tp->frto = 0; + } + } + + if (recovered) { + /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ icsk->icsk_retransmits = 0; tcp_try_undo_recovery(sk); return; } - if (flag & FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; - if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) - tcp_reset_reno_sack(tp); - if (tcp_try_undo_loss(sk)) + if (tcp_is_reno(tp)) { + /* A Reno DUPACK means new data in F-RTO step 2.b above are + * delivered. Lower inflight to clock out (re)tranmissions. + */ + if (after(tp->snd_nxt, tp->high_seq) && is_dupack) + tcp_add_reno_sack(sk); + else if (flag & FLAG_SND_UNA_ADVANCED) + tcp_reset_reno_sack(tp); + } + if (tcp_try_undo_loss(sk, false)) return; - tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); } @@ -2764,7 +2809,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: - tcp_process_loss(sk, flag); + tcp_process_loss(sk, flag, is_dupack); if (icsk->icsk_ca_state != TCP_CA_Open) return; /* Fall through to processing in Open state. */ @@ -3003,6 +3048,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } if (!(sacked & TCPCB_SACKED_ACKED)) reord = min(pkts_acked, reord); + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_SACKED_ACKED) -- cgit v1.2.3-70-g09d2 From eaaa31392690c7609f7afeec5ba38a79d009842d Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 21 Mar 2013 20:33:48 +0400 Subject: netlink: Diag core and basic socket info dumping (v2) The netlink_diag can be built as a module, just like it's done in unix sockets. The core dumping message carries the basic info about netlink sockets: family, type and protocol, portis, dst_group, dst_portid, state. Groups can be received as an optional parameter NETLINK_DIAG_GROUPS. Netlink sockets cab be filtered by protocols. The socket inode number and cookie is reserved for future per-socket info retrieving. The per-protocol filtering is also reserved for future by requiring the sdiag_protocol to be zero. The file /proc/net/netlink doesn't provide enough information for dumping netlink sockets. It doesn't provide dst_group, dst_portid, groups above 32. v2: fix NETLINK_DIAG_MAX. Now it's equal to the last constant. Acked-by: Pavel Emelyanov Cc: "David S. Miller" Cc: Eric Dumazet Cc: Pablo Neira Ayuso Cc: "Eric W. Biederman" Cc: Gao feng Cc: Thomas Graf Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- include/uapi/linux/netlink_diag.h | 42 +++++++++ net/Kconfig | 1 + net/netlink/Kconfig | 10 ++ net/netlink/Makefile | 3 + net/netlink/diag.c | 188 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 244 insertions(+) create mode 100644 include/uapi/linux/netlink_diag.h create mode 100644 net/netlink/Kconfig create mode 100644 net/netlink/diag.c (limited to 'include') diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h new file mode 100644 index 00000000000..88009a31cd0 --- /dev/null +++ b/include/uapi/linux/netlink_diag.h @@ -0,0 +1,42 @@ +#ifndef __NETLINK_DIAG_H__ +#define __NETLINK_DIAG_H__ + +#include + +struct netlink_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u16 pad; + __u32 ndiag_ino; + __u32 ndiag_show; + __u32 ndiag_cookie[2]; +}; + +struct netlink_diag_msg { + __u8 ndiag_family; + __u8 ndiag_type; + __u8 ndiag_protocol; + __u8 ndiag_state; + + __u32 ndiag_portid; + __u32 ndiag_dst_portid; + __u32 ndiag_dst_group; + __u32 ndiag_ino; + __u32 ndiag_cookie[2]; +}; + +enum { + NETLINK_DIAG_MEMINFO, + NETLINK_DIAG_GROUPS, + + __NETLINK_DIAG_MAX, +}; + +#define NETLINK_DIAG_MAX (__NETLINK_DIAG_MAX - 1) + +#define NDIAG_PROTO_ALL ((__u8) ~0) + +#define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ +#define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ + +#endif diff --git a/net/Kconfig b/net/Kconfig index 6f676ab885b..2ddc9046868 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,6 +217,7 @@ source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" +source "net/netlink/Kconfig" config RPS boolean diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig new file mode 100644 index 00000000000..5d6e8c05b3d --- /dev/null +++ b/net/netlink/Kconfig @@ -0,0 +1,10 @@ +# +# Netlink Sockets +# + +config NETLINK_DIAG + tristate "NETLINK: socket monitoring interface" + default n + ---help--- + Support for NETLINK socket monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/netlink/Makefile b/net/netlink/Makefile index bdd6ddf4e95..e837917f6c0 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile @@ -3,3 +3,6 @@ # obj-y := af_netlink.o genetlink.o + +obj-$(CONFIG_NETLINK_DIAG) += netlink_diag.o +netlink_diag-y := diag.o diff --git a/net/netlink/diag.c b/net/netlink/diag.c new file mode 100644 index 00000000000..5ffb1d1cf40 --- /dev/null +++ b/net/netlink/diag.c @@ -0,0 +1,188 @@ +#include + +#include +#include +#include +#include + +#include "af_netlink.h" + +static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->groups == NULL) + return 0; + + return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups), + nlk->groups); +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_diag_req *req, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct nlmsghdr *nlh; + struct netlink_diag_msg *rep; + struct netlink_sock *nlk = nlk_sk(sk); + + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); + if (!nlh) + return -EMSGSIZE; + + rep = nlmsg_data(nlh); + rep->ndiag_family = AF_NETLINK; + rep->ndiag_type = sk->sk_type; + rep->ndiag_protocol = sk->sk_protocol; + rep->ndiag_state = sk->sk_state; + + rep->ndiag_ino = sk_ino; + rep->ndiag_portid = nlk->portid; + rep->ndiag_dst_portid = nlk->dst_portid; + rep->ndiag_dst_group = nlk->dst_group; + sock_diag_save_cookie(sk, rep->ndiag_cookie); + + if ((req->ndiag_show & NDIAG_SHOW_GROUPS) && + sk_diag_dump_groups(sk, skb)) + goto out_nlmsg_trim; + + if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) + goto out_nlmsg_trim; + + return nlmsg_end(skb, nlh); + +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + int protocol, int s_num) +{ + struct netlink_table *tbl = &nl_table[protocol]; + struct nl_portid_hash *hash = &tbl->hash; + struct net *net = sock_net(skb->sk); + struct netlink_diag_req *req; + struct sock *sk; + int ret = 0, num = 0, i; + + req = nlmsg_data(cb->nlh); + + for (i = 0; i <= hash->mask; i++) { + sk_for_each(sk, &hash->table[i]) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { + num++; + continue; + } + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + goto done; + } + + num++; + } + } + + sk_for_each_bound(sk, &tbl->mc_list) { + if (sk_hashed(sk)) + continue; + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { + num++; + continue; + } + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + goto done; + } + num++; + } +done: + cb->args[0] = num; + cb->args[1] = protocol; + + return ret; +} + +static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct netlink_diag_req *req; + int s_num = cb->args[0]; + + req = nlmsg_data(cb->nlh); + + read_lock(&nl_table_lock); + + if (req->sdiag_protocol == NDIAG_PROTO_ALL) { + int i; + + for (i = cb->args[1]; i < MAX_LINKS; i++) { + if (__netlink_diag_dump(skb, cb, i, s_num)) + break; + s_num = 0; + } + } else { + if (req->sdiag_protocol >= MAX_LINKS) { + read_unlock(&nl_table_lock); + return -ENOENT; + } + + __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); + } + + read_unlock(&nl_table_lock); + + return skb->len; +} + +static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct netlink_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = netlink_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } else + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler netlink_diag_handler = { + .family = AF_NETLINK, + .dump = netlink_diag_handler_dump, +}; + +static int __init netlink_diag_init(void) +{ + return sock_diag_register(&netlink_diag_handler); +} + +static void __exit netlink_diag_exit(void) +{ + sock_diag_unregister(&netlink_diag_handler); +} + +module_init(netlink_diag_init); +module_exit(netlink_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */); -- cgit v1.2.3-70-g09d2 From 79617801ea0c0e6664cb497d4c1892c2ff407364 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 21 Mar 2013 22:22:03 +0100 Subject: filter: bpf_jit_comp: refactor and unify BPF JIT image dump output If bpf_jit_enable > 1, then we dump the emitted JIT compiled image after creation. Currently, only SPARC and PowerPC has similar output as in the reference implementation on x86_64. Make a small helper function in order to reduce duplicated code and make the dump output uniform across architectures x86_64, SPARC, PPC, ARM (e.g. on ARM flen, pass and proglen are currently not shown, but would be interesting to know as well), also for future BPF JIT implementations on other archs. Cc: Mircea Gherzan Cc: Matt Evans Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 5 ++--- arch/powerpc/net/bpf_jit_comp.c | 12 ++++-------- arch/sparc/net/bpf_jit_comp.c | 6 +----- arch/x86/net/bpf_jit_comp.c | 9 ++------- include/linux/filter.h | 10 ++++++++++ 5 files changed, 19 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a0bd8a755bd..1a643ee8e08 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -918,9 +918,8 @@ void bpf_jit_compile(struct sk_filter *fp) #endif if (bpf_jit_enable > 1) - print_hex_dump(KERN_INFO, "BPF JIT code: ", - DUMP_PREFIX_ADDRESS, 16, 4, ctx.target, - alloc_size, false); + /* there are 2 passes here */ + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; out: diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index e834f1ec23c..c427ae36374 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -671,16 +671,12 @@ void bpf_jit_compile(struct sk_filter *fp) } if (bpf_jit_enable > 1) - pr_info("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + /* Note that we output the base address of the code_base + * rather than image, since opcodes are in code_base. + */ + bpf_jit_dump(flen, proglen, pass, code_base); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", - DUMP_PREFIX_ADDRESS, - 16, 1, code_base, - proglen, false); - bpf_flush_icache(code_base, code_base + (proglen/4)); /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 3109ca684a9..d36a85ebb5e 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -795,13 +795,9 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; } if (bpf_jit_enable > 1) - pr_err("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + bpf_jit_dump(flen, proglen, pass, image); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, - 16, 1, image, proglen, false); bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 3cbe45381bb..f66b54086ce 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -725,17 +725,12 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; } oldproglen = proglen; } + if (bpf_jit_enable > 1) - pr_err("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + bpf_jit_dump(flen, proglen, pass, image); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, - 16, 1, image, proglen, false); - bpf_flush_icache(image, image + proglen); - fp->bpf_func = (void *)image; } out: diff --git a/include/linux/filter.h b/include/linux/filter.h index d2059cb4e46..d7d25083130 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -50,6 +50,16 @@ extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, uns #ifdef CONFIG_BPF_JIT extern void bpf_jit_compile(struct sk_filter *fp); extern void bpf_jit_free(struct sk_filter *fp); + +static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, + u32 pass, void *image) +{ + pr_err("flen=%u proglen=%u pass=%u image=%p\n", + flen, proglen, pass, image); + if (image) + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, + 16, 1, image, proglen, false); +} #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else static inline void bpf_jit_compile(struct sk_filter *fp) -- cgit v1.2.3-70-g09d2 From 19dde0bd71e3dffb03ddc509019e22250f4e20c0 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 21 Mar 2013 15:47:54 +0100 Subject: cfg80211: add P2P Notice of Absence attribute Add P2P Notice of Absence attribute structure. Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 4cf0c9e4dd9..d10b5bba326 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1027,6 +1027,26 @@ enum ieee80211_p2p_attr_id { IEEE80211_P2P_ATTR_MAX }; +/* Notice of Absence attribute - described in P2P spec 4.1.14 */ +/* Typical max value used here */ +#define IEEE80211_P2P_NOA_DESC_MAX 4 + +struct ieee80211_p2p_noa_desc { + u8 count; + __le32 duration; + __le32 interval; + __le32 start_time; +} __packed; + +struct ieee80211_p2p_noa_attr { + u8 index; + u8 oppps_ctwindow; + struct ieee80211_p2p_noa_desc desc[IEEE80211_P2P_NOA_DESC_MAX]; +} __packed; + +#define IEEE80211_P2P_OPPPS_ENABLE_BIT BIT(7) +#define IEEE80211_P2P_OPPPS_CTWINDOW_MASK 0x7F + /** * struct ieee80211_bar - HT Block Ack Request * -- cgit v1.2.3-70-g09d2 From 67baf66339f82b5ddef5731caedb1e6db496818d Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 21 Mar 2013 15:47:56 +0100 Subject: mac80211: add P2P NoA settings Add P2P NoA settings for STA mode. Signed-off-by: Janusz Dziedzic [fix docs] Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c | 12 ++++++-- include/net/mac80211.h | 6 ++-- net/mac80211/cfg.c | 23 +++++++++++---- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 45 ++++++++++++++++------------- net/mac80211/trace.h | 6 ++-- 6 files changed, 57 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c index 341dbc0237e..1d20287b112 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c @@ -662,6 +662,7 @@ static int iwl_mvm_mac_ctxt_cmd_p2p_client(struct iwl_mvm *mvm, u32 action) { struct iwl_mac_ctx_cmd cmd = {}; + struct ieee80211_p2p_noa_attr *noa = &vif->bss_conf.p2p_noa_attr; WARN_ON(vif->type != NL80211_IFTYPE_STATION || !vif->p2p); @@ -671,7 +672,8 @@ static int iwl_mvm_mac_ctxt_cmd_p2p_client(struct iwl_mvm *mvm, /* Fill the data specific for station mode */ iwl_mvm_mac_ctxt_cmd_fill_sta(mvm, vif, &cmd.p2p_sta.sta); - cmd.p2p_sta.ctwin = cpu_to_le32(vif->bss_conf.p2p_ctwindow); + cmd.p2p_sta.ctwin = cpu_to_le32(noa->oppps_ctwindow & + IEEE80211_P2P_OPPPS_CTWINDOW_MASK); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); } @@ -892,6 +894,7 @@ static int iwl_mvm_mac_ctxt_cmd_go(struct iwl_mvm *mvm, u32 action) { struct iwl_mac_ctx_cmd cmd = {}; + struct ieee80211_p2p_noa_attr *noa = &vif->bss_conf.p2p_noa_attr; WARN_ON(vif->type != NL80211_IFTYPE_AP || !vif->p2p); @@ -901,8 +904,11 @@ static int iwl_mvm_mac_ctxt_cmd_go(struct iwl_mvm *mvm, /* Fill the data specific for GO mode */ iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.go.ap); - cmd.go.ctwin = cpu_to_le32(vif->bss_conf.p2p_ctwindow); - cmd.go.opp_ps_enabled = cpu_to_le32(!!vif->bss_conf.p2p_oppps); + cmd.go.ctwin = cpu_to_le32(noa->oppps_ctwindow & + IEEE80211_P2P_OPPPS_CTWINDOW_MASK); + cmd.go.opp_ps_enabled = + cpu_to_le32(!!(noa->oppps_ctwindow & + IEEE80211_P2P_OPPPS_ENABLE_BIT)); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd73b8c6746..9b536172e27 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -330,8 +330,7 @@ enum ieee80211_rssi_event { * @ssid_len: Length of SSID given in @ssid. * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode. * @txpower: TX power in dBm - * @p2p_ctwindow: P2P CTWindow, only for P2P client interfaces - * @p2p_oppps: P2P opportunistic PS is enabled + * @p2p_noa_attr: P2P NoA attribute for P2P powersave */ struct ieee80211_bss_conf { const u8 *bssid; @@ -365,8 +364,7 @@ struct ieee80211_bss_conf { size_t ssid_len; bool hidden_ssid; int txpower; - u8 p2p_ctwindow; - bool p2p_oppps; + struct ieee80211_p2p_noa_attr p2p_noa_attr; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e5c1441ac2b..50aaf25d473 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -965,8 +965,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.hidden_ssid = (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); - sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; - sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; + memset(&sdata->vif.bss_conf.p2p_noa_attr, 0, + sizeof(sdata->vif.bss_conf.p2p_noa_attr)); + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow = + params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + if (params->p2p_opp_ps) + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + IEEE80211_P2P_OPPPS_ENABLE_BIT; err = ieee80211_assign_beacon(sdata, ¶ms->beacon); if (err < 0) @@ -1961,12 +1966,20 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (params->p2p_ctwindow >= 0) { - sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= + ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; changed |= BSS_CHANGED_P2P_PS; } - if (params->p2p_opp_ps >= 0) { - sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; + if (params->p2p_opp_ps > 0) { + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= + IEEE80211_P2P_OPPPS_ENABLE_BIT; + changed |= BSS_CHANGED_P2P_PS; + } else if (params->p2p_opp_ps == 0) { + sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= + ~IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ae2d1754b79..55155e3b9b2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -442,7 +442,7 @@ struct ieee80211_if_managed { u8 use_4addr; - u8 p2p_noa_index; + s16 p2p_noa_index; /* Signal strength from the last Beacon frame in the current BSS. */ int last_beacon_signal; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f9258707b15..8b3e852d603 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1661,20 +1661,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); ies = rcu_dereference(cbss->ies); if (ies) { - struct ieee80211_p2p_noa_attr noa; int ret; ret = cfg80211_get_p2p_attr( ies->data, ies->len, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, - (u8 *) &noa, sizeof(noa)); + (u8 *) &bss_conf->p2p_noa_attr, + sizeof(bss_conf->p2p_noa_attr)); if (ret >= 2) { - bss_conf->p2p_oppps = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_ENABLE_BIT; - bss_conf->p2p_ctwindow = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + sdata->u.mgd.p2p_noa_index = + bss_conf->p2p_noa_attr.index; bss_info_changed |= BSS_CHANGED_P2P_PS; - sdata->u.mgd.p2p_noa_index = noa.index; } } rcu_read_unlock(); @@ -1799,8 +1796,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_ASSOC; sdata->vif.bss_conf.assoc = false; - sdata->vif.bss_conf.p2p_ctwindow = 0; - sdata->vif.bss_conf.p2p_oppps = false; + ifmgd->p2p_noa_index = -1; + memset(&sdata->vif.bss_conf.p2p_noa_attr, 0, + sizeof(sdata->vif.bss_conf.p2p_noa_attr)); /* on the next assoc, re-program HT/VHT parameters */ memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); @@ -2963,24 +2961,30 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.p2p) { - struct ieee80211_p2p_noa_attr noa; + struct ieee80211_p2p_noa_attr noa = {}; int ret; ret = cfg80211_get_p2p_attr(mgmt->u.beacon.variable, len - baselen, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, (u8 *) &noa, sizeof(noa)); - if (ret >= 2 && sdata->u.mgd.p2p_noa_index != noa.index) { - bss_conf->p2p_oppps = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_ENABLE_BIT; - bss_conf->p2p_ctwindow = noa.oppps_ctwindow & - IEEE80211_P2P_OPPPS_CTWINDOW_MASK; + if (ret >= 2) { + if (sdata->u.mgd.p2p_noa_index != noa.index) { + /* valid noa_attr and index changed */ + sdata->u.mgd.p2p_noa_index = noa.index; + memcpy(&bss_conf->p2p_noa_attr, &noa, sizeof(noa)); + changed |= BSS_CHANGED_P2P_PS; + /* + * make sure we update all information, the CRC + * mechanism doesn't look at P2P attributes. + */ + ifmgd->beacon_crc_valid = false; + } + } else if (sdata->u.mgd.p2p_noa_index != -1) { + /* noa_attr not found and we had valid noa_attr before */ + sdata->u.mgd.p2p_noa_index = -1; + memset(&bss_conf->p2p_noa_attr, 0, sizeof(bss_conf->p2p_noa_attr)); changed |= BSS_CHANGED_P2P_PS; - sdata->u.mgd.p2p_noa_index = noa.index; - /* - * make sure we update all information, the CRC - * mechanism doesn't look at P2P attributes. - */ ifmgd->beacon_crc_valid = false; } } @@ -3523,6 +3527,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->powersave = sdata->wdev.ps; ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; + ifmgd->p2p_noa_index = -1; mutex_init(&ifmgd->mtx); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index c5899797a8d..d79e374e129 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -359,8 +359,7 @@ TRACE_EVENT(drv_bss_info_changed, __dynamic_array(u8, ssid, info->ssid_len); __field(bool, hidden_ssid); __field(int, txpower) - __field(u8, p2p_ctwindow) - __field(bool, p2p_oppps) + __field(u8, p2p_oppps_ctwindow) ), TP_fast_assign( @@ -400,8 +399,7 @@ TRACE_EVENT(drv_bss_info_changed, memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len); __entry->hidden_ssid = info->hidden_ssid; __entry->txpower = info->txpower; - __entry->p2p_ctwindow = info->p2p_ctwindow; - __entry->p2p_oppps = info->p2p_oppps; + __entry->p2p_oppps_ctwindow = info->p2p_noa_attr.oppps_ctwindow; ), TP_printk( -- cgit v1.2.3-70-g09d2 From 9d0ca6ed6f2f12eb488f450d5d38d047aa402a53 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 21 Mar 2013 14:17:34 +0000 Subject: virtio: remove obsolete virtqueue_get_queue_index() You can access it directly now, since 3.8: v3.7-rc1-13-g06ca287 'virtio: move queue_index and num_free fields into core struct virtqueue.' Cc: Cornelia Huck Signed-off-by: Rusty Russell Acked-by: Cornelia Huck Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- drivers/s390/kvm/virtio_ccw.c | 6 +++--- include/linux/virtio.h | 6 ------ 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 57ac4b0294b..f7d67e8eb1a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -154,7 +154,7 @@ struct padded_vnet_hdr { */ static int vq2txq(struct virtqueue *vq) { - return (virtqueue_get_queue_index(vq) - 1) / 2; + return (vq->index - 1) / 2; } static int txq2vq(int txq) @@ -164,7 +164,7 @@ static int txq2vq(int txq) static int vq2rxq(struct virtqueue *vq) { - return virtqueue_get_queue_index(vq) / 2; + return vq->index / 2; } static int rxq2vq(int rxq) diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 2029b6caa59..fb877b59ec5 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -166,7 +166,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq) vcdev = to_vc_device(info->vq->vdev); ccw_device_get_schid(vcdev->cdev, &schid); - do_kvm_notify(schid, virtqueue_get_queue_index(vq)); + do_kvm_notify(schid, vq->index); } static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, @@ -188,7 +188,7 @@ static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) unsigned long flags; unsigned long size; int ret; - unsigned int index = virtqueue_get_queue_index(vq); + unsigned int index = vq->index; /* Remove from our list. */ spin_lock_irqsave(&vcdev->lock, flags); @@ -610,7 +610,7 @@ static struct virtqueue *virtio_ccw_vq_by_ind(struct virtio_ccw_device *vcdev, vq = NULL; spin_lock_irqsave(&vcdev->lock, flags); list_for_each_entry(info, &vcdev->virtqueues, node) { - if (virtqueue_get_queue_index(info->vq) == index) { + if (info->vq->index == index) { vq = info->vq; break; } diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ff6714e6d0f..2d7a5e04590 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -58,12 +58,6 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq); unsigned int virtqueue_get_vring_size(struct virtqueue *vq); -/* FIXME: Obsolete accessor, but required for virtio_net merge. */ -static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq) -{ - return vq->index; -} - /** * virtio_device - representation of a device using virtio * @index: unique position on the virtio bus -- cgit v1.2.3-70-g09d2 From c3a07134e6aa5b93a37f72ffa3d11fadf72bf757 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Mar 2013 03:39:28 +0000 Subject: mv643xx_eth: convert to use the Marvell Orion MDIO driver This patch converts the Marvell MV643XX ethernet driver to use the Marvell Orion MDIO driver. As a result, PowerPC and ARM platforms registering the Marvell MV643XX ethernet driver are also updated to register a Marvell Orion MDIO driver. This driver voluntarily overlaps with the Marvell Ethernet shared registers because it will use a subset of this shared register (shared_base + 0x4 to shared_base + 0x84). The Ethernet driver is also updated to look up for a PHY device using the Orion MDIO bus driver. For ARM and PowerPC we register a single instance of the "mvmdio" driver in the system like it used to be done with the use of the "shared_smi" platform_data cookie on ARM. Note that it is safe to register the mvmdio driver only for the "ge00" instance of the driver because this "ge00" interface is guaranteed to always be explicitely registered by consumers of arch/arm/plat-orion/common.c and other instances (ge01, ge10 and ge11) were all pointing their shared_smi to ge00. For PowerPC the in-tree Device Tree Source files mention only one MV643XX ethernet MAC instance so the MDIO bus driver is registered only when id == 0. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- arch/arm/plat-orion/common.c | 54 ++++---- arch/powerpc/platforms/chrp/pegasos_eth.c | 20 +++ arch/powerpc/sysdev/mv64x60_dev.c | 16 ++- drivers/net/ethernet/marvell/Kconfig | 5 +- drivers/net/ethernet/marvell/Makefile | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 195 +++-------------------------- include/linux/mv643xx_eth.h | 1 - 7 files changed, 84 insertions(+), 209 deletions(-) (limited to 'include') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 2d4b6414609..251f827271e 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -238,6 +238,7 @@ static __init void ge_complete( struct mv643xx_eth_shared_platform_data *orion_ge_shared_data, struct resource *orion_ge_resource, unsigned long irq, struct platform_device *orion_ge_shared, + struct platform_device *orion_ge_mvmdio, struct mv643xx_eth_platform_data *eth_data, struct platform_device *orion_ge) { @@ -247,6 +248,8 @@ static __init void ge_complete( orion_ge->dev.platform_data = eth_data; platform_device_register(orion_ge_shared); + if (orion_ge_mvmdio) + platform_device_register(orion_ge_mvmdio); platform_device_register(orion_ge); } @@ -258,8 +261,6 @@ struct mv643xx_eth_shared_platform_data orion_ge00_shared_data; static struct resource orion_ge00_shared_resources[] = { { .name = "ge00 base", - }, { - .name = "ge00 err irq", }, }; @@ -271,6 +272,19 @@ static struct platform_device orion_ge00_shared = { }, }; +static struct resource orion_ge_mvmdio_resources[] = { + { + .name = "ge00 mvmdio base", + }, { + .name = "ge00 mvmdio err irq", + }, +}; + +static struct platform_device orion_ge_mvmdio = { + .name = "orion-mdio", + .id = -1, +}; + static struct resource orion_ge00_resources[] = { { .name = "ge00 irq", @@ -295,26 +309,25 @@ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data, unsigned int tx_csum_limit) { fill_resources(&orion_ge00_shared, orion_ge00_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); + fill_resources(&orion_ge_mvmdio, orion_ge_mvmdio_resources, + mapbase + 0x2004, 0x84 - 1, irq_err); orion_ge00_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge00_shared_data, orion_ge00_resources, irq, &orion_ge00_shared, + &orion_ge_mvmdio, eth_data, &orion_ge00); } /***************************************************************************** * GE01 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge01_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge01_shared_data; static struct resource orion_ge01_shared_resources[] = { { .name = "ge01 base", - }, { - .name = "ge01 err irq", - }, + } }; static struct platform_device orion_ge01_shared = { @@ -349,26 +362,23 @@ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data, unsigned int tx_csum_limit) { fill_resources(&orion_ge01_shared, orion_ge01_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); orion_ge01_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge01_shared_data, orion_ge01_resources, irq, &orion_ge01_shared, + NULL, eth_data, &orion_ge01); } /***************************************************************************** * GE10 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge10_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge10_shared_data; static struct resource orion_ge10_shared_resources[] = { { .name = "ge10 base", - }, { - .name = "ge10 err irq", - }, + } }; static struct platform_device orion_ge10_shared = { @@ -402,24 +412,21 @@ void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data, unsigned long irq_err) { fill_resources(&orion_ge10_shared, orion_ge10_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); ge_complete(&orion_ge10_shared_data, orion_ge10_resources, irq, &orion_ge10_shared, + NULL, eth_data, &orion_ge10); } /***************************************************************************** * GE11 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge11_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge11_shared_data; static struct resource orion_ge11_shared_resources[] = { { .name = "ge11 base", - }, { - .name = "ge11 err irq", }, }; @@ -454,9 +461,10 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, unsigned long irq_err) { fill_resources(&orion_ge11_shared, orion_ge11_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); ge_complete(&orion_ge11_shared_data, orion_ge11_resources, irq, &orion_ge11_shared, + NULL, eth_data, &orion_ge11); } diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c index 039fc8e8219..2b4dc6abde6 100644 --- a/arch/powerpc/platforms/chrp/pegasos_eth.c +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -47,6 +47,25 @@ static struct platform_device mv643xx_eth_shared_device = { .resource = mv643xx_eth_shared_resources, }; +/* + * The orion mdio driver only covers shared + 0x4 up to shared + 0x84 - 1 + */ +static struct resource mv643xx_eth_mvmdio_resources[] = { + [0] = { + .name = "ethernet mdio base", + .start = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x4, + .end = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x83, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device mv643xx_eth_mvmdio_device = { + .name = "orion-mdio", + .id = -1, + .num_resources = ARRAY_SIZE(mv643xx_eth_mvmdio_resources), + .resource = mv643xx_eth_shared_resources, +}; + static struct resource mv643xx_eth_port1_resources[] = { [0] = { .name = "eth port1 irq", @@ -82,6 +101,7 @@ static struct platform_device eth_port1_device = { static struct platform_device *mv643xx_eth_pd_devs[] __initdata = { &mv643xx_eth_shared_device, + &mv643xx_eth_mvmdio_device, ð_port1_device, }; diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index 0f6af41ebb4..4a25c26f0bf 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -214,15 +214,27 @@ static struct platform_device * __init mv64x60_eth_register_shared_pdev( struct device_node *np, int id) { struct platform_device *pdev; - struct resource r[1]; + struct resource r[2]; int err; err = of_address_to_resource(np, 0, &r[0]); if (err) return ERR_PTR(err); + /* register an orion mdio bus driver */ + r[1].start = r[0].start + 0x4; + r[1].end = r[0].start + 0x84 - 1; + r[1].flags = IORESOURCE_MEM; + + if (id == 0) { + pdev = platform_device_register_simple("orion-mdio", -1, &r[1], 1); + if (!pdev) + return pdev; + } + pdev = platform_device_register_simple(MV643XX_ETH_SHARED_NAME, id, - r, 1); + &r[0], 1); + return pdev; } diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index edfba937092..5170ecb00ac 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -23,6 +23,7 @@ config MV643XX_ETH depends on (MV64X60 || PPC32 || PLAT_ORION) && INET select INET_LRO select PHYLIB + select MVMDIO ---help--- This driver supports the gigabit ethernet MACs in the Marvell Discovery PPC/MIPS chipset family (MV643XX) and @@ -38,9 +39,7 @@ config MVMDIO interface units of the Marvell EBU SoCs (Kirkwood, Orion5x, Dove, Armada 370 and Armada XP). - For now, this driver is only needed for the MVNETA driver - (used on Armada 370 and XP), but it could be used in the - future by the MV643XX_ETH driver. + This driver is used by the MV643XX_ETH and MVNETA drivers. config MVNETA tristate "Marvell Armada 370/XP network interface support" diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile index 7f63b4aac43..5c4a7765ff0 100644 --- a/drivers/net/ethernet/marvell/Makefile +++ b/drivers/net/ethernet/marvell/Makefile @@ -2,8 +2,8 @@ # Makefile for the Marvell device drivers. # -obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o obj-$(CONFIG_MVMDIO) += mvmdio.o +obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o obj-$(CONFIG_MVNETA) += mvneta.o obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_SKGE) += skge.o diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index d1ecf4bf7da..a65a92ef19e 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -69,14 +69,6 @@ static char mv643xx_eth_driver_version[] = "1.4"; * Registers shared between all ports. */ #define PHY_ADDR 0x0000 -#define SMI_REG 0x0004 -#define SMI_BUSY 0x10000000 -#define SMI_READ_VALID 0x08000000 -#define SMI_OPCODE_READ 0x04000000 -#define SMI_OPCODE_WRITE 0x00000000 -#define ERR_INT_CAUSE 0x0080 -#define ERR_INT_SMI_DONE 0x00000010 -#define ERR_INT_MASK 0x0084 #define WINDOW_BASE(w) (0x0200 + ((w) << 3)) #define WINDOW_SIZE(w) (0x0204 + ((w) << 3)) #define WINDOW_REMAP_HIGH(w) (0x0280 + ((w) << 2)) @@ -265,25 +257,6 @@ struct mv643xx_eth_shared_private { */ void __iomem *base; - /* - * Points at the right SMI instance to use. - */ - struct mv643xx_eth_shared_private *smi; - - /* - * Provides access to local SMI interface. - */ - struct mii_bus *smi_bus; - - /* - * If we have access to the error interrupt pin (which is - * somewhat misnamed as it not only reflects internal errors - * but also reflects SMI completion), use that to wait for - * SMI access completion instead of polling the SMI busy bit. - */ - int err_interrupt; - wait_queue_head_t smi_busy_wait; - /* * Per-port MBUS window access register value. */ @@ -1122,97 +1095,6 @@ out_write: wrlp(mp, PORT_SERIAL_CONTROL, pscr); } -static irqreturn_t mv643xx_eth_err_irq(int irq, void *dev_id) -{ - struct mv643xx_eth_shared_private *msp = dev_id; - - if (readl(msp->base + ERR_INT_CAUSE) & ERR_INT_SMI_DONE) { - writel(~ERR_INT_SMI_DONE, msp->base + ERR_INT_CAUSE); - wake_up(&msp->smi_busy_wait); - return IRQ_HANDLED; - } - - return IRQ_NONE; -} - -static int smi_is_done(struct mv643xx_eth_shared_private *msp) -{ - return !(readl(msp->base + SMI_REG) & SMI_BUSY); -} - -static int smi_wait_ready(struct mv643xx_eth_shared_private *msp) -{ - if (msp->err_interrupt == NO_IRQ) { - int i; - - for (i = 0; !smi_is_done(msp); i++) { - if (i == 10) - return -ETIMEDOUT; - msleep(10); - } - - return 0; - } - - if (!smi_is_done(msp)) { - wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp), - msecs_to_jiffies(100)); - if (!smi_is_done(msp)) - return -ETIMEDOUT; - } - - return 0; -} - -static int smi_bus_read(struct mii_bus *bus, int addr, int reg) -{ - struct mv643xx_eth_shared_private *msp = bus->priv; - void __iomem *smi_reg = msp->base + SMI_REG; - int ret; - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - writel(SMI_OPCODE_READ | (reg << 21) | (addr << 16), smi_reg); - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - ret = readl(smi_reg); - if (!(ret & SMI_READ_VALID)) { - pr_warn("SMI bus read not valid\n"); - return -ENODEV; - } - - return ret & 0xffff; -} - -static int smi_bus_write(struct mii_bus *bus, int addr, int reg, u16 val) -{ - struct mv643xx_eth_shared_private *msp = bus->priv; - void __iomem *smi_reg = msp->base + SMI_REG; - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - writel(SMI_OPCODE_WRITE | (reg << 21) | - (addr << 16) | (val & 0xffff), smi_reg); - - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - return 0; -} - - /* statistics ***************************************************************/ static struct net_device_stats *mv643xx_eth_get_stats(struct net_device *dev) { @@ -2687,47 +2569,6 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) if (msp->base == NULL) goto out_free; - /* - * Set up and register SMI bus. - */ - if (pd == NULL || pd->shared_smi == NULL) { - msp->smi_bus = mdiobus_alloc(); - if (msp->smi_bus == NULL) - goto out_unmap; - - msp->smi_bus->priv = msp; - msp->smi_bus->name = "mv643xx_eth smi"; - msp->smi_bus->read = smi_bus_read; - msp->smi_bus->write = smi_bus_write, - snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%s-%d", - pdev->name, pdev->id); - msp->smi_bus->parent = &pdev->dev; - msp->smi_bus->phy_mask = 0xffffffff; - if (mdiobus_register(msp->smi_bus) < 0) - goto out_free_mii_bus; - msp->smi = msp; - } else { - msp->smi = platform_get_drvdata(pd->shared_smi); - } - - msp->err_interrupt = NO_IRQ; - init_waitqueue_head(&msp->smi_busy_wait); - - /* - * Check whether the error interrupt is hooked up. - */ - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (res != NULL) { - int err; - - err = request_irq(res->start, mv643xx_eth_err_irq, - IRQF_SHARED, "mv643xx_eth", msp); - if (!err) { - writel(ERR_INT_SMI_DONE, msp->base + ERR_INT_MASK); - msp->err_interrupt = res->start; - } - } - /* * (Re-)program MBUS remapping windows if we are asked to. */ @@ -2743,10 +2584,6 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) return 0; -out_free_mii_bus: - mdiobus_free(msp->smi_bus); -out_unmap: - iounmap(msp->base); out_free: kfree(msp); out: @@ -2756,14 +2593,7 @@ out: static int mv643xx_eth_shared_remove(struct platform_device *pdev) { struct mv643xx_eth_shared_private *msp = platform_get_drvdata(pdev); - struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data; - if (pd == NULL || pd->shared_smi == NULL) { - mdiobus_unregister(msp->smi_bus); - mdiobus_free(msp->smi_bus); - } - if (msp->err_interrupt != NO_IRQ) - free_irq(msp->err_interrupt, msp); iounmap(msp->base); kfree(msp); @@ -2826,14 +2656,21 @@ static void set_params(struct mv643xx_eth_private *mp, mp->txq_count = pd->tx_queue_count ? : 1; } +static void mv643xx_eth_adjust_link(struct net_device *dev) +{ + struct mv643xx_eth_private *mp = netdev_priv(dev); + + mv643xx_adjust_pscr(mp); +} + static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, int phy_addr) { - struct mii_bus *bus = mp->shared->smi->smi_bus; struct phy_device *phydev; int start; int num; int i; + char phy_id[MII_BUS_ID_SIZE + 3]; if (phy_addr == MV643XX_ETH_PHY_ADDR_DEFAULT) { start = phy_addr_get(mp) & 0x1f; @@ -2843,17 +2680,19 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, num = 1; } + /* Attempt to connect to the PHY using orion-mdio */ phydev = NULL; for (i = 0; i < num; i++) { int addr = (start + i) & 0x1f; - if (bus->phy_map[addr] == NULL) - mdiobus_scan(bus, addr); + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, + "orion-mdio-mii", addr); - if (phydev == NULL) { - phydev = bus->phy_map[addr]; - if (phydev != NULL) - phy_addr_set(mp, addr); + phydev = phy_connect(mp->dev, phy_id, mv643xx_eth_adjust_link, + PHY_INTERFACE_MODE_GMII); + if (!IS_ERR(phydev)) { + phy_addr_set(mp, addr); + break; } } @@ -2866,8 +2705,6 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex) phy_reset(mp); - phy_attach(mp->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_GMII); - if (speed == 0) { phy->autoneg = AUTONEG_ENABLE; phy->speed = 0; diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 49258e0ed1c..141d395bbb5 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -19,7 +19,6 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; - struct platform_device *shared_smi; /* * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default * limit of 9KiB will be used. -- cgit v1.2.3-70-g09d2 From 58d7d8f9b20ee6f883532b952f246e4289fe06eb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 21 Mar 2013 07:45:28 +0000 Subject: decnet: Parse netlink attributes on our own decnet is the only subsystem left that is relying on the global netlink attribute buffer rta_buf. It's horrible design and we want to get rid of it. This converts all of decnet to do implicit attribute parsing. It also gets rid of the error prone struct dn_kern_rta. Yes, the fib_magic() stuff is not pretty. It's compiled tested but I need someone with appropriate hardware to test the patch since I don't have access to it. Cc: linux-decnet-user@lists.sourceforge.net Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/dn_fib.h | 28 ++----- net/decnet/dn_fib.c | 211 +++++++++++++++++++++++++++----------------------- net/decnet/dn_route.c | 27 ++++--- net/decnet/dn_table.c | 42 +++++----- 4 files changed, 160 insertions(+), 148 deletions(-) (limited to 'include') diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 1ee9d4bda30..74004af31c4 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -1,24 +1,9 @@ #ifndef _NET_DN_FIB_H #define _NET_DN_FIB_H -/* WARNING: The ordering of these elements must match ordering - * of RTA_* rtnetlink attribute numbers. - */ -struct dn_kern_rta { - void *rta_dst; - void *rta_src; - int *rta_iif; - int *rta_oif; - void *rta_gw; - u32 *rta_priority; - void *rta_prefsrc; - struct rtattr *rta_mx; - struct rtattr *rta_mp; - unsigned char *rta_protoinfo; - u32 *rta_flow; - struct rta_cacheinfo *rta_ci; - struct rta_session *rta_sess; -}; +#include + +extern const struct nla_policy rtm_dn_policy[]; struct dn_fib_res { struct fib_rule *r; @@ -93,10 +78,10 @@ struct dn_fib_table { u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, - struct dn_kern_rta *rta, struct nlmsghdr *n, + struct nlattr *attrs[], struct nlmsghdr *n, struct netlink_skb_parms *req); int (*delete)(struct dn_fib_table *t, struct rtmsg *r, - struct dn_kern_rta *rta, struct nlmsghdr *n, + struct nlattr *attrs[], struct nlmsghdr *n, struct netlink_skb_parms *req); int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld, struct dn_fib_res *res); @@ -116,13 +101,12 @@ extern void dn_fib_cleanup(void); extern int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); extern struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, - struct dn_kern_rta *rta, + struct nlattr *attrs[], const struct nlmsghdr *nlh, int *errp); extern int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res); extern void dn_fib_release_info(struct dn_fib_info *fi); -extern __le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type); extern void dn_fib_flush(void); extern void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index e36614eccc0..42a8048fe72 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi return NULL; } -__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type) +static int dn_fib_count_nhs(const struct nlattr *attr) { - while(RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(__le16*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - - return 0; -} - -static int dn_fib_count_nhs(struct rtattr *rta) -{ - int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhs = 0, nhlen = nla_len(attr); while(nhlen >= (int)sizeof(struct rtnexthop)) { if ((nhlen -= nhp->rtnh_len) < 0) @@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta) return nhs; } -static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, + const struct rtmsg *r) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhlen = nla_len(attr); change_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons nh->nh_weight = nhp->rtnh_hops + 1; if (attrlen) { - nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); @@ -268,7 +260,8 @@ out: } -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp) +struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[], + const struct nlmsghdr *nlh, int *errp) { int err; struct dn_fib_info *fi = NULL; @@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) goto err_inval; - if (rta->rta_mp) { - nhs = dn_fib_count_nhs(rta->rta_mp); - if (nhs == 0) - goto err_inval; - } + if (attrs[RTA_MULTIPATH] && + (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) + goto err_inval; fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); err = -ENOBUFS; @@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta fi->fib_protocol = r->rtm_protocol; fi->fib_nhs = nhs; fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); - while(RTA_OK(attr, attrlen)) { - unsigned int flavour = attr->rta_type; + if (attrs[RTA_PRIORITY]) + fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]); + + if (attrs[RTA_METRICS]) { + struct nlattr *attr; + int rem; - if (flavour) { - if (flavour > RTAX_MAX) + nla_for_each_nested(attr, attrs[RTA_METRICS], rem) { + int type = nla_type(attr); + + if (type) { + if (type > RTAX_MAX || nla_len(attr) < 4) goto err_inval; - fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr); + + fi->fib_metrics[type-1] = nla_get_u32(attr); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2); - if (rta->rta_mp) { - if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0) + if (attrs[RTA_PREFSRC]) + fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]); + + if (attrs[RTA_MULTIPATH]) { + if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + + if (attrs[RTA_OIF] && + fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF])) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2)) + + if (attrs[RTA_GATEWAY] && + fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY])) goto err_inval; } else { struct dn_fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 2); + + if (attrs[RTA_OIF]) + nh->nh_oif = nla_get_u32(attrs[RTA_OIF]); + + if (attrs[RTA_GATEWAY]) + nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); + nh->nh_flags = r->rtm_flags; nh->nh_weight = 1; } if (r->rtm_type == RTN_NAT) { - if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) + if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF]) goto err_inval; - memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2); + + fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); goto link_it; } if (dn_fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH]) goto err_inval; + goto link_it; } @@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 2)) + if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] || + fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST])) if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -486,29 +489,24 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) spin_unlock_bh(&dn_fib_multipath_lock); } +const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { + [RTA_DST] = { .type = NLA_U16 }, + [RTA_SRC] = { .type = NLA_U16 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U16 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U16 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .type = NLA_NESTED }, + [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, +}; -static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) -{ - int i; - - for(i = 1; i <= RTA_MAX; i++) { - struct rtattr *attr = rta[i-1]; - if (attr) { - if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS && - i != RTA_TABLE) - rta[i-1] = (struct rtattr *)RTA_DATA(attr); - } - } - - return 0; -} - -static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) { - if (rta[RTA_TABLE - 1]) - table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]); + if (attrs[RTA_TABLE]) + table = nla_get_u32(attrs[RTA_TABLE]); return table; } @@ -517,8 +515,9 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -526,22 +525,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); - if (tb) - return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0); + if (!tb) + return -ESRCH; - return -ESRCH; + return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); } static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -549,14 +550,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); - if (tb) - return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1); + if (!tb) + return -ENOBUFS; - return -ENOBUFS; + return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb)); } static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) @@ -566,10 +568,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad struct nlmsghdr nlh; struct rtmsg rtm; } req; - struct dn_kern_rta rta; + struct { + struct nlattr hdr; + __le16 dst; + } dst_attr = { + .dst = dst, + }; + struct { + struct nlattr hdr; + __le16 prefsrc; + } prefsrc_attr = { + .prefsrc = ifa->ifa_local, + }; + struct { + struct nlattr hdr; + u32 oif; + } oif_attr = { + .oif = ifa->ifa_dev->dev->ifindex, + }; + struct nlattr *attrs[RTA_MAX+1] = { + [RTA_DST] = (struct nlattr *) &dst_attr, + [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr, + [RTA_OIF] = (struct nlattr *) &oif_attr, + }; memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); if (type == RTN_UNICAST) tb = dn_fib_get_table(RT_MIN_TABLE, 1); @@ -591,14 +614,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); req.rtm.rtm_type = type; - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; - if (cmd == RTM_NEWROUTE) - tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL); else - tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL); } static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5ac0e153ef8..b4b3508e77f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1619,17 +1619,21 @@ errout: static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(in_skb->sk); - struct rtattr **rta = arg; struct rtmsg *rtm = nlmsg_data(nlh); struct dn_route *rt = NULL; struct dn_skb_cb *cb; int err; struct sk_buff *skb; struct flowidn fld; + struct nlattr *tb[RTA_MAX+1]; if (!net_eq(net, &init_net)) return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; + memset(&fld, 0, sizeof(fld)); fld.flowidn_proto = DNPROTO_NSP; @@ -1639,12 +1643,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb_reset_mac_header(skb); cb = DN_SKB_CB(skb); - if (rta[RTA_SRC-1]) - memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_IIF-1]) - memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + if (tb[RTA_SRC]) + fld.saddr = nla_get_le16(tb[RTA_SRC]); + + if (tb[RTA_DST]) + fld.daddr = nla_get_le16(tb[RTA_DST]); + + if (tb[RTA_IIF]) + fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]); if (fld.flowidn_iif) { struct net_device *dev; @@ -1669,10 +1675,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (!err && -rt->dst.error) err = rt->dst.error; } else { - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fld.flowidn_oif = oif; + if (tb[RTA_OIF]) + fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]); + err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 6c2445bcaba..fc42a0afd30 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -224,26 +224,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) } -static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi) +static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi) { struct rtnexthop *nhp; int nhlen; - if (rta->rta_priority && *rta->rta_priority != fi->fib_priority) + if (attrs[RTA_PRIORITY] && + nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0)) + if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) { + if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) && + (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw)) return 0; return 1; } - if (rta->rta_mp == NULL) + if (!attrs[RTA_MULTIPATH]) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + nhp = nla_data(attrs[RTA_MULTIPATH]); + nhlen = nla_len(attrs[RTA_MULTIPATH]); for_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -254,7 +255,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) return 1; if (attrlen) { - gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + gw = gw_attr ? nla_get_le16(gw_attr) : 0; if (gw && gw != nh->nh_gw) return 1; @@ -517,7 +521,8 @@ out: return skb->len; } -static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; struct dn_fib_node *new_f, *f, **fp, **del_fp; @@ -536,15 +541,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct return -ENOBUFS; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); } - if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL) + if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL) return err; if (dz->dz_nent > (dz->dz_divisor << 2) && @@ -654,7 +658,8 @@ out: } -static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash*)tb->data; struct dn_fib_node **fp, **del_fp, *f; @@ -671,9 +676,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct return -ESRCH; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); @@ -703,7 +707,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) && - dn_fib_nh_match(r, n, rta, fi) == 0) + dn_fib_nh_match(r, n, attrs, fi) == 0) del_fp = fp; } -- cgit v1.2.3-70-g09d2 From 661d2967b3f1b34eeaa7e212e7b9bbe8ee072b59 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 21 Mar 2013 07:45:29 +0000 Subject: rtnetlink: Remove passing of attributes into rtnl_doit functions With decnet converted, we can finally get rid of rta_buf and its computations around it. It also gets rid of the minimal header length verification since all message handlers do that explicitly anyway. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 2 +- net/bridge/br_mdb.c | 4 +-- net/can/gw.c | 5 ++- net/core/fib_rules.c | 4 +-- net/core/neighbour.c | 6 ++-- net/core/rtnetlink.c | 82 ++++++------------------------------------------- net/dcb/dcbnl.c | 2 +- net/decnet/dn_dev.c | 4 +-- net/decnet/dn_fib.c | 4 +-- net/decnet/dn_route.c | 2 +- net/ipv4/devinet.c | 7 ++--- net/ipv4/fib_frontend.c | 4 +-- net/ipv4/route.c | 2 +- net/ipv6/addrconf.c | 10 +++--- net/ipv6/addrlabel.c | 6 ++-- net/ipv6/route.c | 6 ++-- net/phonet/pn_netlink.c | 4 +-- net/sched/act_api.c | 2 +- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 6 ++-- 20 files changed, 47 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 5a15fabd6a7..702664833a5 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -4,7 +4,7 @@ #include #include -typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *); +typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index ee79f3f2038..19942e38fd2 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -382,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, return ret; } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct br_mdb_entry *entry; @@ -458,7 +458,7 @@ unlock: return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net_device *dev; struct br_mdb_entry *entry; diff --git a/net/can/gw.c b/net/can/gw.c index 2d117dc5ebe..2dc619db805 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, return 0; } -static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct rtcanmsg *r; struct cgw_job *gwj; @@ -868,7 +867,7 @@ static void cgw_remove_all_jobs(void) } } -static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct cgw_job *gwj = NULL; struct hlist_node *nx; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 58a4ba27dfe..d5a9f8ead0d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -266,7 +266,7 @@ errout: return err; } -static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -415,7 +415,7 @@ errout: return err; } -static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3863b8f639c..c72a646d9f4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1613,7 +1613,7 @@ int neigh_table_clear(struct neigh_table *tbl) } EXPORT_SYMBOL(neigh_table_clear); -static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1677,7 +1677,7 @@ out: return err; } -static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1955,7 +1955,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_LOCKTIME] = { .type = NLA_U64 }, }; -static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct neigh_table *tbl; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9a9b99e1fb7..751f1244b64 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -515,32 +515,6 @@ out: return err; } -static const int rtm_min[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), - [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), - [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), -}; - -static const int rta_max[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, - [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, - [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, - [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, - [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, - [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, - [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, -}; - int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; @@ -1537,7 +1511,7 @@ errout: return err; } -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -1578,7 +1552,7 @@ errout: return err; } -static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1709,7 +1683,7 @@ static int rtnl_group_changelink(struct net *net, int group, return 0; } -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1864,7 +1838,7 @@ out: } } -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2081,7 +2055,7 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_add); -static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2179,7 +2153,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); -static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2478,8 +2452,7 @@ errout: return err; } -static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2549,8 +2522,7 @@ out: return err; } -static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2620,10 +2592,6 @@ out: return err; } -/* Protected by RTNL sempahore. */ -static struct rtattr **rta_buf; -static int rtattr_max; - /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -2631,7 +2599,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); rtnl_doit_func doit; int sz_idx, kind; - int min_len; int family; int type; int err; @@ -2679,32 +2646,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } - memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); - - min_len = rtm_min[sz_idx]; - if (nlh->nlmsg_len < min_len) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned int flavor = attr->rta_type & NLA_TYPE_MASK; - if (flavor) { - if (flavor > rta_max[sz_idx]) - return -EINVAL; - rta_buf[flavor-1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } - doit = rtnl_get_doit(family, type); if (doit == NULL) return -EOPNOTSUPP; - return doit(skb, nlh, (void *)&rta_buf[0]); + return doit(skb, nlh); } static void rtnetlink_rcv(struct sk_buff *skb) @@ -2774,16 +2720,6 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { - int i; - - rtattr_max = 0; - for (i = 0; i < ARRAY_SIZE(rta_max); i++) - if (rta_max[i] > rtattr_max) - rtattr_max = rta_max[i]; - rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL); - if (!rta_buf) - panic("rtnetlink_init: cannot allocate rta_buf\n"); - if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 21291f1abcd..40d5829ed36 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1658,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = { [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, }; -static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct net_device *netdev; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c8da116d84a..7d9197063eb 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { .len = IFNAMSIZ - 1 }, }; -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -607,7 +607,7 @@ errout: return err; } -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 42a8048fe72..f0930594993 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -511,7 +511,7 @@ static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) return table; } -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; @@ -536,7 +536,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); } -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b4b3508e77f..5904429e8d6 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1616,7 +1616,7 @@ errout: /* * This is called by both endnodes and routers now. */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm = nlmsg_data(nlh); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index af57bbae05b..20a9f9274f3 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } -static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -775,7 +775,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) return NULL; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; @@ -1730,8 +1730,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb4bb12b3eb..0e74398bc8e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -604,7 +604,7 @@ errout: return err; } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -626,7 +626,7 @@ errout: return err; } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e2851464f8..550781a17b3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2311,7 +2311,7 @@ nla_put_failure: return -EMSGSIZE; } -static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fa36a677490..15794fdaf02 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -544,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -3578,7 +3577,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { }; static int -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3644,7 +3643,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, } static int -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3983,8 +3982,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, - void *arg) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 6f226c850c9..f083a583a05 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_LABEL] = { .len = sizeof(u32), }, }; -static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; @@ -530,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, - void *arg) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e5fe0041adf..ad0aa6b0b86 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2355,7 +2355,7 @@ beginning: return last_err; } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a return ip6_route_del(&cfg); } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, 0, NLM_F_MULTI); } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 0193630d306..dc15f430080 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U8 }, }; -static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { [RTA_OIF] = { .type = NLA_U32 }, }; -static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8579c4bb20c..fd7072827a4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -982,7 +982,7 @@ done: return ret; } -static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 964f5e4f4b8..9a04b981bc1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -118,7 +118,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) /* Add/change/delete/get a filter node */ -static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c297e2a8e2a..0bbce229ac6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -971,7 +971,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); @@ -1038,7 +1038,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) * Create/change qdisc. */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm; @@ -1372,7 +1372,7 @@ done: -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); -- cgit v1.2.3-70-g09d2 From 63998ac24f8370caf99e433483532bab8368eb7e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 22 Mar 2013 06:28:43 +0000 Subject: ipv6: provide addr and netconf dump consistency info This patch adds a dev_addr_genid for IPv6. The goal is to use it, combined with dev_base_seq to check if a change occurs during a netlink dump. If a change is detected, the flag NLM_F_DUMP_INTR is set in the first message after the dump was interrupted. Note that only dump of unicast addresses is checked (multicast and anycast are not checked). Reported-by: Junwei Zhang Reported-by: Hongjun Li Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + net/ipv6/addrconf.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 1242f371718..005e2c2e39a 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,6 +71,7 @@ struct netns_ipv6 { struct fib_rules_ops *mr6_rules_ops; #endif #endif + atomic_t dev_addr_genid; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 15794fdaf02..1b9f4f25212 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -621,6 +621,8 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ + net->dev_base_seq; hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -638,6 +640,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, rcu_read_unlock(); goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -3874,6 +3877,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, NLM_F_MULTI); if (err <= 0) break; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } break; } @@ -3931,6 +3935,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_ip_idx = ip_idx = cb->args[2]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq; for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -4407,6 +4412,8 @@ errout: static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { + struct net *net = dev_net(ifp->idev->dev); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -4432,6 +4439,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) dst_free(&ifp->rt->dst); break; } + atomic_inc(&net->ipv6.dev_addr_genid); } static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) -- cgit v1.2.3-70-g09d2 From be991971d53e0f5b6d13e3940192054216590072 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 22 Mar 2013 08:24:37 +0000 Subject: inet: generalize ipv4-only RFC3168 5.3 ecn fragmentation handling for future use by ipv6 This patch just moves some code arround to make the ip4_frag_ecn_table and IPFRAG_ECN_* constants accessible from the other reassembly engines. I also renamed ip4_frag_ecn_table to ip_frag_ecn_table. Cc: Eric Dumazet Cc: Jesper Dangaard Brouer Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/inet_frag.h | 12 ++++++++++++ net/ipv4/inet_fragment.c | 22 ++++++++++++++++++++++ net/ipv4/ip_fragment.c | 31 +------------------------------ 3 files changed, 35 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0a1dcc2fa2f..64b4e7d23b8 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -153,4 +153,16 @@ static inline void inet_frag_lru_add(struct netns_frags *nf, list_add_tail(&q->lru_list, &nf->lru_list); spin_unlock(&nf->lru_lock); } + +/* RFC 3168 support : + * We want to check ECN values of all fragments, do detect invalid combinations. + * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. + */ +#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ +#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ +#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ +#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ + +extern const u8 ip_frag_ecn_table[16]; + #endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f4fd23de9b1..2bff045bec6 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -23,6 +23,28 @@ #include #include +#include + +/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements + * Value : 0xff if frame should be dropped. + * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field + */ +const u8 ip_frag_ecn_table[16] = { + /* at least one fragment had CE, and others ECT_0 or ECT_1 */ + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + + /* invalid combinations : drop frame */ + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +}; +EXPORT_SYMBOL(ip_frag_ecn_table); static void inet_frag_secret_rebuild(unsigned long dummy) { diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a6445b843ef..938520668b2 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -79,40 +79,11 @@ struct ipq { struct inet_peer *peer; }; -/* RFC 3168 support : - * We want to check ECN values of all fragments, do detect invalid combinations. - * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. - */ -#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ -#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ -#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ -#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ - static inline u8 ip4_frag_ecn(u8 tos) { return 1 << (tos & INET_ECN_MASK); } -/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements - * Value : 0xff if frame should be dropped. - * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field - */ -static const u8 ip4_frag_ecn_table[16] = { - /* at least one fragment had CE, and others ECT_0 or ECT_1 */ - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - - /* invalid combinations : drop frame */ - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, -}; - static struct inet_frags ip4_frags; int ip_frag_nqueues(struct net *net) @@ -551,7 +522,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, ipq_kill(qp); - ecn = ip4_frag_ecn_table[qp->ecn]; + ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { err = -EINVAL; goto out_fail; -- cgit v1.2.3-70-g09d2 From eec2e6185ff6eab18c2cae9b01a9fbc5c33248fc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 22 Mar 2013 08:24:44 +0000 Subject: ipv6: implement RFC3168 5.3 (ecn protection) for ipv6 fragmentation handling Hello! After patch 1 got accepted to net-next I will also send a patch to netfilter-devel to make the corresponding changes to the netfilter reassembly logic. Thanks, Hannes -- >8 -- [PATCH 2/2] ipv6: implement RFC3168 5.3 (ecn protection) for ipv6 fragmentation handling This patch also ensures that INET_ECN_CE is propagated if one fragment had the codepoint set. Cc: Eric Dumazet Cc: Jesper Dangaard Brouer Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ net/ipv6/reassembly.c | 23 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 42ef6abf25c..0810aa57c78 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -478,6 +478,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + u8 ecn; }; void ip6_frag_init(struct inet_frag_queue *q, void *a); @@ -497,6 +498,7 @@ struct frag_queue { int iif; unsigned int csum; __u16 nhoffset; + u8 ecn; }; void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 196ab9347ad..e6e44cef8db 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -58,6 +58,7 @@ #include #include #include +#include struct ip6frag_skb_cb { @@ -67,6 +68,10 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +} static struct inet_frags ip6_frags; @@ -119,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) fq->user = arg->user; fq->saddr = *arg->src; fq->daddr = *arg->dst; + fq->ecn = arg->ecn; } EXPORT_SYMBOL(ip6_frag_init); @@ -173,7 +179,8 @@ static void ip6_frag_expire(unsigned long data) } static __inline__ struct frag_queue * -fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, + const struct in6_addr *dst, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -183,6 +190,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.ecn = ecn; read_lock(&ip6_frags.lock); hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); @@ -202,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net_device *dev; int offset, end; struct net *net = dev_net(skb_dst(skb)->dev); + u8 ecn; if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -219,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; } + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, @@ -319,6 +330,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + fq->ecn |= ecn; add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. @@ -362,9 +374,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int payload_len; unsigned int nhoff; int sum_truesize; + u8 ecn; inet_frag_kill(&fq->q, &ip6_frags); + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; + /* Make the one we just received the head. */ if (prev) { head = prev->next; @@ -463,6 +480,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; /* Yes, and fold redundant checksum back. 8) */ @@ -526,7 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS, evicted); - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); if (fq != NULL) { int ret; -- cgit v1.2.3-70-g09d2 From 8bf24293453a10a696bcd00abb1248e6365f66c2 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 25 Mar 2013 11:15:59 +0200 Subject: cfg80211: Document update_ft_ies() cfg80211_ops This was forgotten from the commit that added support for FT operations with drivers that implement SME. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bdba9b61906..57870b64697 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1998,6 +1998,10 @@ struct cfg80211_update_ft_ies_params { * advertise the support for MAC based ACL have to implement this callback. * * @start_radar_detection: Start radar detection in the driver. + * + * @update_ft_ies: Provide updated Fast BSS Transition information to the + * driver. If the SME is in the driver/firmware, this information can be + * used in building Authentication and Reassociation Request frames. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); -- cgit v1.2.3-70-g09d2 From 219c38674c262378ec411dd8318ebfd199fbce8d Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Tue, 22 Jan 2013 16:52:23 +0200 Subject: mac80211: allow drivers to set default uAPSD parameters mac80211 currently sets uAPSD parameters to have VO AC trigger- and delivery-enabled, with maximum service period length. Allow drivers to change these default settings since different uAPSD client implementations may handle errors differently and be able to recover from some errors. Note: some APs may not function correctly if one or all ACs are trigger- and delivery-enabled, see http://thread.gmane.org/gmane.linux.kernel.wireless.general/93577. We retested with this AP and later firmware doesn't have this bug any more. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ net/mac80211/main.c | 2 ++ net/mac80211/mlme.c | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9b536172e27..23a275a9a3b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1534,6 +1534,17 @@ enum ieee80211_hw_flags { * @netdev_features: netdev features to be set in each netdev created * from this HW. Note only HW checksum features are currently * compatible with mac80211. Other feature bits will be rejected. + * + * @uapsd_queues: This bitmap is included in (re)association frame to indicate + * for each access category if it is uAPSD trigger-enabled and delivery- + * enabled. Use IEEE80211_WMM_IE_STA_QOSINFO_AC_* to set this bitmap. + * Each bit corresponds to different AC. Value '1' in specific bit means + * that corresponding AC is both trigger- and delivery-enabled. '0' means + * neither enabled. + * + * @uapsd_max_sp_len: maximum number of total buffered frames the WMM AP may + * deliver to a WMM STA during any Service Period triggered by the WMM STA. + * Use IEEE80211_WMM_IE_STA_QOSINFO_SP_* for correct values. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1559,6 +1570,8 @@ struct ieee80211_hw { u8 radiotap_mcs_details; u16 radiotap_vht_details; netdev_features_t netdev_features; + u8 uapsd_queues; + u8 uapsd_max_sp_len; }; /** diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c6f81ecc36a..b0d28682186 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -587,6 +587,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, IEEE80211_RADIOTAP_MCS_HAVE_BW; local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI | IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; + local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; + local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8b3e852d603..9958cb7df8f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3525,8 +3525,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->flags = 0; ifmgd->powersave = sdata->wdev.ps; - ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; - ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; + ifmgd->uapsd_queues = sdata->local->hw.uapsd_queues; + ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; ifmgd->p2p_noa_index = -1; mutex_init(&ifmgd->mtx); -- cgit v1.2.3-70-g09d2 From 25c7704d8bdcf6746dab4397953df51759924b37 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 24 Mar 2013 17:36:29 +0000 Subject: ipv4: Fix ip-header identification for gso packets. ip-header id needs to be incremented even if IP_DF flag is set. This behaviour was changed in commit 490ab08127cebc25e3a26 (IP_GRE: Fix IP-Identification). Following patch fixes it so that identification is always incremented. Reported-by: Cong Wang Acked-by: Cong Wang Signed-off-by: Pravin B Shelar --- include/net/ipip.h | 16 ++++++---------- net/ipv4/af_inet.c | 3 +-- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/ipip.h b/include/net/ipip.h index 0c046e3bca0..483b91a10bb 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -74,15 +74,11 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_DF)) - iph->id = 0; - else { - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); - } + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); } #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e5882caf8a..70b2d4cf580 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1334,8 +1334,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); } else { - if (!(iph->frag_off & htons(IP_DF))) - iph->id = htons(id++); + iph->id = htons(id++); } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; -- cgit v1.2.3-70-g09d2 From 675a0b049abf6edf30f8dd84c5610b6edc2296c8 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 25 Mar 2013 16:26:57 +0100 Subject: mac80211: Use a cfg80211_chan_def in ieee80211_hw_conf_chan Drivers that don't use chanctxes cannot perform VHT association because they still use a "backward compatibility" pair of {ieee80211_channel, nl80211_channel_type} in ieee80211_conf and ieee80211_local. Signed-off-by: Karl Beldan [fix kernel-doc] Signed-off-by: Johannes Berg --- drivers/net/wireless/adm8211.c | 3 +- drivers/net/wireless/at76c50x-usb.c | 4 +- drivers/net/wireless/ath/ar5523/ar5523.c | 14 ++--- drivers/net/wireless/ath/ath5k/base.c | 2 +- drivers/net/wireless/ath/ath5k/mac80211-ops.c | 4 +- drivers/net/wireless/ath/ath9k/beacon.c | 2 +- drivers/net/wireless/ath/ath9k/calib.c | 2 +- drivers/net/wireless/ath/ath9k/common.c | 5 +- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 16 +++--- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 8 +-- drivers/net/wireless/ath/ath9k/hw.c | 5 +- drivers/net/wireless/ath/ath9k/link.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 10 ++-- drivers/net/wireless/ath/ath9k/rc.c | 4 +- drivers/net/wireless/ath/ath9k/recv.c | 6 +-- drivers/net/wireless/ath/carl9170/debug.c | 5 +- drivers/net/wireless/ath/carl9170/mac.c | 8 +-- drivers/net/wireless/ath/carl9170/main.c | 9 ++-- drivers/net/wireless/ath/carl9170/phy.c | 4 +- drivers/net/wireless/b43/b43.h | 2 +- drivers/net/wireless/b43/main.c | 8 +-- drivers/net/wireless/b43/phy_ht.c | 5 +- drivers/net/wireless/b43/phy_lcn.c | 5 +- drivers/net/wireless/b43/phy_n.c | 5 +- drivers/net/wireless/b43legacy/main.c | 9 ++-- drivers/net/wireless/brcm80211/brcmsmac/channel.c | 4 +- .../net/wireless/brcm80211/brcmsmac/mac80211_if.c | 6 +-- drivers/net/wireless/brcm80211/brcmsmac/main.c | 4 +- drivers/net/wireless/iwlegacy/3945-rs.c | 2 +- drivers/net/wireless/iwlegacy/4965-rs.c | 2 +- drivers/net/wireless/iwlegacy/common.c | 2 +- drivers/net/wireless/iwlwifi/dvm/rs.c | 2 +- drivers/net/wireless/iwlwifi/dvm/rxon.c | 9 ++-- drivers/net/wireless/libertas_tf/main.c | 8 +-- drivers/net/wireless/mac80211_hwsim.c | 42 +++++++++------ drivers/net/wireless/mwl8k.c | 36 +++++++------ drivers/net/wireless/p54/fwio.c | 4 +- drivers/net/wireless/p54/main.c | 4 +- drivers/net/wireless/p54/txrx.c | 4 +- drivers/net/wireless/rt2x00/rt2800lib.c | 8 +-- drivers/net/wireless/rt2x00/rt2x00config.c | 10 ++-- drivers/net/wireless/rt2x00/rt61pci.c | 2 +- drivers/net/wireless/rt2x00/rt73usb.c | 2 +- drivers/net/wireless/rtl818x/rtl8180/dev.c | 4 +- drivers/net/wireless/rtl818x/rtl8180/grf5101.c | 3 +- drivers/net/wireless/rtl818x/rtl8180/max2820.c | 2 +- drivers/net/wireless/rtl818x/rtl8180/rtl8225.c | 3 +- drivers/net/wireless/rtl818x/rtl8180/sa2400.c | 3 +- drivers/net/wireless/rtl818x/rtl8187/dev.c | 4 +- drivers/net/wireless/rtl818x/rtl8187/rtl8225.c | 3 +- drivers/net/wireless/rtlwifi/base.c | 4 +- drivers/net/wireless/rtlwifi/core.c | 6 +-- drivers/net/wireless/rtlwifi/rtl8192ce/trx.c | 4 +- drivers/net/wireless/rtlwifi/rtl8192cu/trx.c | 8 +-- drivers/net/wireless/rtlwifi/rtl8192de/trx.c | 4 +- drivers/net/wireless/rtlwifi/rtl8192se/trx.c | 4 +- drivers/net/wireless/rtlwifi/rtl8723ae/trx.c | 4 +- drivers/net/wireless/ti/wl1251/main.c | 5 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- drivers/net/wireless/zd1211rw/zd_mac.c | 4 +- include/net/mac80211.h | 18 +++---- net/mac80211/cfg.c | 7 +-- net/mac80211/chan.c | 11 ++-- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/main.c | 59 +++++++++++++--------- net/mac80211/mlme.c | 24 ++++++--- net/mac80211/scan.c | 6 +-- net/mac80211/trace.h | 37 +++++++------- net/mac80211/tx.c | 4 +- net/mac80211/util.c | 3 +- 70 files changed, 293 insertions(+), 244 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index 3d339e04efb..f9a24e599de 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1293,7 +1293,8 @@ static int adm8211_config(struct ieee80211_hw *dev, u32 changed) { struct adm8211_priv *priv = dev->priv; struct ieee80211_conf *conf = &dev->conf; - int channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + int channel = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); if (channel != priv->channel) { priv->channel = channel; diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 5ac5f7ae272..34c8a33cac0 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -1943,12 +1943,12 @@ static int at76_config(struct ieee80211_hw *hw, u32 changed) struct at76_priv *priv = hw->priv; at76_dbg(DBG_MAC80211, "%s(): channel %d", - __func__, hw->conf.channel->hw_value); + __func__, hw->conf.chandef.chan->hw_value); at76_dbg_dump(DBG_MAC80211, priv->bssid, ETH_ALEN, "bssid:"); mutex_lock(&priv->mtx); - priv->channel = hw->conf.channel->hw_value; + priv->channel = hw->conf.chandef.chan->hw_value; if (is_valid_ether_addr(priv->bssid)) at76_join(priv); diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index afd1e36d308..17d7fece35d 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -457,14 +457,14 @@ static int ar5523_set_chan(struct ar5523 *ar) memset(&reset, 0, sizeof(reset)); reset.flags |= cpu_to_be32(UATH_CHAN_2GHZ); reset.flags |= cpu_to_be32(UATH_CHAN_OFDM); - reset.freq = cpu_to_be32(conf->channel->center_freq); + reset.freq = cpu_to_be32(conf->chandef.chan->center_freq); reset.maxrdpower = cpu_to_be32(50); /* XXX */ reset.channelchange = cpu_to_be32(1); reset.keeprccontent = cpu_to_be32(0); ar5523_dbg(ar, "set chan flags 0x%x freq %d\n", be32_to_cpu(reset.flags), - conf->channel->center_freq); + conf->chandef.chan->center_freq); return ar5523_cmd_write(ar, WDCMSG_RESET, &reset, sizeof(reset), 0); } @@ -594,7 +594,7 @@ static void ar5523_data_rx_cb(struct urb *urb) rx_status = IEEE80211_SKB_RXCB(data->skb); memset(rx_status, 0, sizeof(*rx_status)); rx_status->freq = be32_to_cpu(desc->channel); - rx_status->band = hw->conf.channel->band; + rx_status->band = hw->conf.chandef.chan->band; rx_status->signal = -95 + be32_to_cpu(desc->rssi); ieee80211_rx_irqsafe(hw, data->skb); @@ -1153,13 +1153,13 @@ static int ar5523_get_wlan_mode(struct ar5523 *ar, struct ieee80211_sta *sta; u32 sta_rate_set; - band = ar->hw->wiphy->bands[ar->hw->conf.channel->band]; + band = ar->hw->wiphy->bands[ar->hw->conf.chandef.chan->band]; sta = ieee80211_find_sta(ar->vif, bss_conf->bssid); if (!sta) { ar5523_info(ar, "STA not found!\n"); return WLAN_MODE_11b; } - sta_rate_set = sta->supp_rates[ar->hw->conf.channel->band]; + sta_rate_set = sta->supp_rates[ar->hw->conf.chandef.chan->band]; for (bit = 0; bit < band->n_bitrates; bit++) { if (sta_rate_set & 1) { @@ -1197,11 +1197,11 @@ static void ar5523_create_rateset(struct ar5523 *ar, ar5523_info(ar, "STA not found. Cannot set rates\n"); sta_rate_set = bss_conf->basic_rates; } else - sta_rate_set = sta->supp_rates[ar->hw->conf.channel->band]; + sta_rate_set = sta->supp_rates[ar->hw->conf.chandef.chan->band]; ar5523_dbg(ar, "sta rate_set = %08x\n", sta_rate_set); - band = ar->hw->wiphy->bands[ar->hw->conf.channel->band]; + band = ar->hw->wiphy->bands[ar->hw->conf.chandef.chan->band]; for (bit = 0; bit < band->n_bitrates; bit++) { BUG_ON(i >= AR5523_MAX_NRATES); ar5523_dbg(ar, "Considering rate %d : %d\n", diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 1d264c0f5a9..9b20d9ee271 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -2639,7 +2639,7 @@ int ath5k_start(struct ieee80211_hw *hw) * be followed by initialization of the appropriate bits * and then setup of the interrupt mask. */ - ah->curchan = ah->hw->conf.channel; + ah->curchan = ah->hw->conf.chandef.chan; ah->imask = AR5K_INT_RXOK | AR5K_INT_RXERR | AR5K_INT_RXEOL diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index 4264341533e..06f86f43571 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -202,7 +202,7 @@ ath5k_config(struct ieee80211_hw *hw, u32 changed) mutex_lock(&ah->lock); if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { - ret = ath5k_chan_set(ah, conf->channel); + ret = ath5k_chan_set(ah, conf->chandef.chan); if (ret < 0) goto unlock; } @@ -678,7 +678,7 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey) memcpy(survey, &ah->survey, sizeof(*survey)); - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->noise = ah->ah_noise_floor; survey->filled = SURVEY_INFO_NOISE_DBM | SURVEY_INFO_CHANNEL_TIME | diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index 5f05c26d1ec..2ff570f7f8f 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -79,7 +79,7 @@ static void ath9k_beacon_setup(struct ath_softc *sc, struct ieee80211_vif *vif, u8 chainmask = ah->txchainmask; u8 rate = 0; - sband = &sc->sbands[common->hw->conf.channel->band]; + sband = &sc->sbands[common->hw->conf.chandef.chan->band]; rate = sband->bitrates[rateidx].hw_value; if (vif->bss_conf.use_short_preamble) rate |= sband->bitrates[rateidx].hw_value_short; diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c index 1e8508530e9..b184f1ff0d3 100644 --- a/drivers/net/wireless/ath/ath9k/calib.c +++ b/drivers/net/wireless/ath/ath9k/calib.c @@ -208,7 +208,7 @@ bool ath9k_hw_reset_calvalid(struct ath_hw *ah) return true; ath_dbg(common, CALIBRATE, "Resetting Cal %d state for channel %u\n", - currCal->calData->calType, conf->channel->center_freq); + currCal->calData->calType, conf->chandef.chan->center_freq); ah->caldata->CalValid &= ~currCal->calData->calType; currCal->calState = CAL_WAITING; diff --git a/drivers/net/wireless/ath/ath9k/common.c b/drivers/net/wireless/ath/ath9k/common.c index 905f1b31396..6c78fe7ca54 100644 --- a/drivers/net/wireless/ath/ath9k/common.c +++ b/drivers/net/wireless/ath/ath9k/common.c @@ -133,13 +133,14 @@ EXPORT_SYMBOL(ath9k_cmn_update_ichannel); struct ath9k_channel *ath9k_cmn_get_curchannel(struct ieee80211_hw *hw, struct ath_hw *ah) { - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; struct ath9k_channel *channel; u8 chan_idx; chan_idx = curchan->hw_value; channel = &ah->channels[chan_idx]; - ath9k_cmn_update_ichannel(channel, curchan, hw->conf.channel_type); + ath9k_cmn_update_ichannel(channel, curchan, + cfg80211_get_chandef_type(&hw->conf.chandef)); return channel; } diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a8016d70088..098e3545e51 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -190,7 +190,7 @@ void ath9k_htc_reset(struct ath9k_htc_priv *priv) { struct ath_hw *ah = priv->ah; struct ath_common *common = ath9k_hw_common(ah); - struct ieee80211_channel *channel = priv->hw->conf.channel; + struct ieee80211_channel *channel = priv->hw->conf.chandef.chan; struct ath9k_hw_cal_data *caldata = NULL; enum htc_phymode mode; __be16 htc_mode; @@ -250,7 +250,7 @@ static int ath9k_htc_set_channel(struct ath9k_htc_priv *priv, struct ath_common *common = ath9k_hw_common(ah); struct ieee80211_conf *conf = &common->hw->conf; bool fastcc; - struct ieee80211_channel *channel = hw->conf.channel; + struct ieee80211_channel *channel = hw->conf.chandef.chan; struct ath9k_hw_cal_data *caldata = NULL; enum htc_phymode mode; __be16 htc_mode; @@ -602,7 +602,7 @@ static void ath9k_htc_setup_rate(struct ath9k_htc_priv *priv, u32 caps = 0; int i, j; - sband = priv->hw->wiphy->bands[priv->hw->conf.channel->band]; + sband = priv->hw->wiphy->bands[priv->hw->conf.chandef.chan->band]; for (i = 0, j = 0; i < sband->n_bitrates; i++) { if (sta->supp_rates[sband->band] & BIT(i)) { @@ -904,7 +904,7 @@ static int ath9k_htc_start(struct ieee80211_hw *hw) struct ath9k_htc_priv *priv = hw->priv; struct ath_hw *ah = priv->ah; struct ath_common *common = ath9k_hw_common(ah); - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; struct ath9k_channel *init_channel; int ret = 0; enum htc_phymode mode; @@ -1193,15 +1193,17 @@ static int ath9k_htc_config(struct ieee80211_hw *hw, u32 changed) } if ((changed & IEEE80211_CONF_CHANGE_CHANNEL) || chip_reset) { - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&hw->conf.chandef); int pos = curchan->hw_value; ath_dbg(common, CONFIG, "Set channel: %d MHz\n", curchan->center_freq); ath9k_cmn_update_ichannel(&priv->ah->channels[pos], - hw->conf.channel, - hw->conf.channel_type); + hw->conf.chandef.chan, + channel_type); if (ath9k_htc_set_channel(priv, hw, &priv->ah->channels[pos]) < 0) { ath_err(common, "Unable to set channel\n"); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 3ad1fd05c5e..306c55019e7 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -490,7 +490,7 @@ static void ath9k_htc_tx_process(struct ath9k_htc_priv *priv, if (txs->ts_flags & ATH9K_HTC_TXSTAT_SGI) rate->flags |= IEEE80211_TX_RC_SHORT_GI; } else { - if (cur_conf->channel->band == IEEE80211_BAND_5GHZ) + if (cur_conf->chandef.chan->band == IEEE80211_BAND_5GHZ) rate->idx += 4; /* No CCK rates */ } @@ -939,7 +939,7 @@ static void ath9k_process_rate(struct ieee80211_hw *hw, return; } - band = hw->conf.channel->band; + band = hw->conf.chandef.chan->band; sband = hw->wiphy->bands[band]; for (i = 0; i < sband->n_bitrates; i++) { @@ -1078,8 +1078,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, priv->ah->stats.avgbrssi = rxbuf->rxstatus.rs_rssi; rx_status->mactime = be64_to_cpu(rxbuf->rxstatus.rs_tstamp); - rx_status->band = hw->conf.channel->band; - rx_status->freq = hw->conf.channel->center_freq; + rx_status->band = hw->conf.chandef.chan->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; rx_status->signal = rxbuf->rxstatus.rs_rssi + ATH_DEFAULT_NOISE_FLOOR; rx_status->antenna = rxbuf->rxstatus.rs_antenna; rx_status->flag |= RX_FLAG_MACTIME_END; diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 2a2ae403e0e..d5e6a38fe74 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -139,7 +139,7 @@ static void ath9k_hw_set_clockrate(struct ath_hw *ah) clockrate = 117; else if (!ah->curchan) /* should really check for CCK instead */ clockrate = ATH9K_CLOCK_RATE_CCK; - else if (conf->channel->band == IEEE80211_BAND_2GHZ) + else if (conf->chandef.chan->band == IEEE80211_BAND_2GHZ) clockrate = ATH9K_CLOCK_RATE_2GHZ_OFDM; else if (ah->caps.hw_caps & ATH9K_HW_CAP_FASTCLOCK) clockrate = ATH9K_CLOCK_FAST_RATE_5GHZ_OFDM; @@ -1110,7 +1110,8 @@ void ath9k_hw_init_global_settings(struct ath_hw *ah) * BA frames in some implementations, but it has been found to fix ACK * timeout issues in other cases as well. */ - if (conf->channel && conf->channel->band == IEEE80211_BAND_2GHZ && + if (conf->chandef.chan && + conf->chandef.chan->band == IEEE80211_BAND_2GHZ && !IS_CHAN_HALF_RATE(chan) && !IS_CHAN_QUARTER_RATE(chan)) { acktimeout += 64 - sifstime - ah->slottime; ctstimeout += 48 - sifstime - ah->slottime; diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index ade3afb21f9..b1433f561cd 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -213,7 +213,7 @@ static bool ath_paprd_send_frame(struct ath_softc *sc, struct sk_buff *skb, int txctl.txq = sc->tx.txq_map[IEEE80211_AC_BE]; memset(tx_info, 0, sizeof(*tx_info)); - tx_info->band = hw->conf.channel->band; + tx_info->band = hw->conf.chandef.chan->band; tx_info->flags |= IEEE80211_TX_CTL_NO_ACK; tx_info->control.rates[0].idx = 0; tx_info->control.rates[0].count = 1; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 24650fd4169..f984a03f912 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -585,7 +585,7 @@ static int ath9k_start(struct ieee80211_hw *hw) struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; struct ath9k_channel *init_channel; int r; @@ -1184,7 +1184,9 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) } if ((changed & IEEE80211_CONF_CHANGE_CHANNEL) || reset_channel) { - struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_channel *curchan = hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&conf->chandef); int pos = curchan->hw_value; int old_pos = -1; unsigned long flags; @@ -1193,7 +1195,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) old_pos = ah->curchan - &ah->channels[0]; ath_dbg(common, CONFIG, "Set channel: %d MHz type: %d\n", - curchan->center_freq, conf->channel_type); + curchan->center_freq, channel_type); /* update survey stats for the old channel before switching */ spin_lock_irqsave(&common->cc_lock, flags); @@ -1208,7 +1210,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) ath9k_hw_getnf(ah, ah->curchan); ath9k_cmn_update_ichannel(&sc->sc_ah->channels[pos], - curchan, conf->channel_type); + curchan, channel_type); /* * If the operating channel changes, change the survey in-use flags diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 96ac433ba7f..aa4d368d8d3 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -814,7 +814,7 @@ static void ath_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, * So, set fourth rate in series to be same as third one for * above conditions. */ - if ((sc->hw->conf.channel->band == IEEE80211_BAND_2GHZ) && + if ((sc->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) && (conf_is_ht(&sc->hw->conf))) { u8 dot11rate = rate_table->info[rix].dot11rate; u8 phy = rate_table->info[rix].phy; @@ -1328,7 +1328,7 @@ static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, ath_dbg(ath9k_hw_common(sc->sc_ah), CONFIG, "Operating HT Bandwidth changed to: %d\n", - sc->hw->conf.channel_type); + cfg80211_get_chandef_type(&sc->hw->conf.chandef)); } } diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index ee156e54314..c90ca57e372 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -859,7 +859,7 @@ static int ath9k_process_rate(struct ath_common *common, unsigned int i = 0; struct ath_softc __maybe_unused *sc = common->priv; - band = hw->conf.channel->band; + band = hw->conf.chandef.chan->band; sband = hw->wiphy->bands[band]; if (rx_stats->rs_rate & 0x80) { @@ -954,8 +954,8 @@ static int ath9k_rx_skb_preprocess(struct ath_common *common, if (ath9k_process_rate(common, hw, rx_stats, rx_status)) return -EINVAL; - rx_status->band = hw->conf.channel->band; - rx_status->freq = hw->conf.channel->center_freq; + rx_status->band = hw->conf.chandef.chan->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; rx_status->signal = ah->noise + rx_stats->rs_rssi; rx_status->antenna = rx_stats->rs_antenna; rx_status->flag |= RX_FLAG_MACTIME_END; diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c index 93fe6003a49..7741fe8e081 100644 --- a/drivers/net/wireless/ath/carl9170/debug.c +++ b/drivers/net/wireless/ath/carl9170/debug.c @@ -654,8 +654,9 @@ static ssize_t carl9170_debugfs_bug_write(struct ar9170 *ar, const char *buf, goto out; case 'P': - err = carl9170_set_channel(ar, ar->hw->conf.channel, - ar->hw->conf.channel_type, CARL9170_RFI_COLD); + err = carl9170_set_channel(ar, ar->hw->conf.chandef.chan, + cfg80211_get_chandef_type(&ar->hw->conf.chandef), + CARL9170_RFI_COLD); if (err < 0) count = err; diff --git a/drivers/net/wireless/ath/carl9170/mac.c b/drivers/net/wireless/ath/carl9170/mac.c index 24d75ab94f0..a2f005703c0 100644 --- a/drivers/net/wireless/ath/carl9170/mac.c +++ b/drivers/net/wireless/ath/carl9170/mac.c @@ -48,7 +48,7 @@ int carl9170_set_dyn_sifs_ack(struct ar9170 *ar) if (conf_is_ht40(&ar->hw->conf)) val = 0x010a; else { - if (ar->hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) val = 0x105; else val = 0x104; @@ -66,7 +66,7 @@ int carl9170_set_rts_cts_rate(struct ar9170 *ar) rts_rate = 0x1da; cts_rate = 0x10a; } else { - if (ar->hw->conf.channel->band == IEEE80211_BAND_2GHZ) { + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) { /* 11 mbit CCK */ rts_rate = 033; cts_rate = 003; @@ -93,7 +93,7 @@ int carl9170_set_slot_time(struct ar9170 *ar) return 0; } - if ((ar->hw->conf.channel->band == IEEE80211_BAND_5GHZ) || + if ((ar->hw->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) || vif->bss_conf.use_short_slot) slottime = 9; @@ -120,7 +120,7 @@ int carl9170_set_mac_rates(struct ar9170 *ar) basic |= (vif->bss_conf.basic_rates & 0xff0) << 4; rcu_read_unlock(); - if (ar->hw->conf.channel->band == IEEE80211_BAND_5GHZ) + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) mandatory = 0xff00; /* OFDM 6/9/12/18/24/36/48/54 */ else mandatory = 0xff0f; /* OFDM (6/9../54) + CCK (1/2/5.5/11) */ diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 08b19319994..4e268b1360d 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -929,6 +929,9 @@ static int carl9170_op_config(struct ieee80211_hw *hw, u32 changed) } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&hw->conf.chandef); + /* adjust slot time for 5 GHz */ err = carl9170_set_slot_time(ar); if (err) @@ -938,8 +941,8 @@ static int carl9170_op_config(struct ieee80211_hw *hw, u32 changed) if (err) goto out; - err = carl9170_set_channel(ar, hw->conf.channel, - hw->conf.channel_type, CARL9170_RFI_NONE); + err = carl9170_set_channel(ar, hw->conf.chandef.chan, + channel_type, CARL9170_RFI_NONE); if (err) goto out; @@ -957,7 +960,7 @@ static int carl9170_op_config(struct ieee80211_hw *hw, u32 changed) } if (changed & IEEE80211_CONF_CHANGE_POWER) { - err = carl9170_set_mac_tpc(ar, ar->hw->conf.channel); + err = carl9170_set_mac_tpc(ar, ar->hw->conf.chandef.chan); if (err) goto out; } diff --git a/drivers/net/wireless/ath/carl9170/phy.c b/drivers/net/wireless/ath/carl9170/phy.c index b72c09cf43a..c5f1fdd1eae 100644 --- a/drivers/net/wireless/ath/carl9170/phy.c +++ b/drivers/net/wireless/ath/carl9170/phy.c @@ -1331,7 +1331,7 @@ static void carl9170_calc_ctl(struct ar9170 *ar, u32 freq, enum carl9170_bw bw) * CTL_ETSI for 2GHz and CTL_FCC for 5GHz. */ ctl_grp = ath_regd_get_band_ctl(&ar->common.regulatory, - ar->hw->conf.channel->band); + ar->hw->conf.chandef.chan->band); /* ctl group not found - either invalid band (NO_CTL) or ww roaming */ if (ctl_grp == NO_CTL || ctl_grp == SD_NO_CTL) @@ -1341,7 +1341,7 @@ static void carl9170_calc_ctl(struct ar9170 *ar, u32 freq, enum carl9170_bw bw) /* skip CTL and heavy clip for CTL_MKK and CTL_ETSI */ return; - if (ar->hw->conf.channel->band == IEEE80211_BAND_2GHZ) { + if (ar->hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) { modes = mode_list_2ghz; nr_modes = ARRAY_SIZE(mode_list_2ghz); } else { diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index 10e288d470e..6a4bd8c433b 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -972,7 +972,7 @@ static inline int b43_is_mode(struct b43_wl *wl, int type) */ static inline enum ieee80211_band b43_current_band(struct b43_wl *wl) { - return wl->hw->conf.channel->band; + return wl->hw->conf.chandef.chan->band; } static inline int b43_bus_may_powerdown(struct b43_wldev *wldev) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 05682736e46..d135e8975f5 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3848,7 +3848,7 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) dev = wl->current_dev; /* Switch the band (if necessary). This might change the active core. */ - err = b43_switch_band(wl, conf->channel); + err = b43_switch_band(wl, conf->chandef.chan); if (err) goto out_unlock_mutex; @@ -3878,8 +3878,8 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) /* Switch to the requested channel. * The firmware takes care of races with the TX handler. */ - if (conf->channel->hw_value != phy->channel) - b43_switch_channel(dev, conf->channel->hw_value); + if (conf->chandef.chan->hw_value != phy->channel) + b43_switch_channel(dev, conf->chandef.chan->hw_value); dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_MONITOR); @@ -5002,7 +5002,7 @@ static int b43_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = dev->stats.link_noise; diff --git a/drivers/net/wireless/b43/phy_ht.c b/drivers/net/wireless/b43/phy_ht.c index 7416c5e9154..016682ea744 100644 --- a/drivers/net/wireless/b43/phy_ht.c +++ b/drivers/net/wireless/b43/phy_ht.c @@ -525,8 +525,9 @@ static void b43_phy_ht_op_switch_analog(struct b43_wldev *dev, bool on) static int b43_phy_ht_op_switch_channel(struct b43_wldev *dev, unsigned int new_channel) { - struct ieee80211_channel *channel = dev->wl->hw->conf.channel; - enum nl80211_channel_type channel_type = dev->wl->hw->conf.channel_type; + struct ieee80211_channel *channel = dev->wl->hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&dev->wl->hw->conf.chandef); if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) { if ((new_channel < 1) || (new_channel > 14)) diff --git a/drivers/net/wireless/b43/phy_lcn.c b/drivers/net/wireless/b43/phy_lcn.c index a13e28ef624..0bafa3b1703 100644 --- a/drivers/net/wireless/b43/phy_lcn.c +++ b/drivers/net/wireless/b43/phy_lcn.c @@ -808,8 +808,9 @@ static void b43_phy_lcn_op_switch_analog(struct b43_wldev *dev, bool on) static int b43_phy_lcn_op_switch_channel(struct b43_wldev *dev, unsigned int new_channel) { - struct ieee80211_channel *channel = dev->wl->hw->conf.channel; - enum nl80211_channel_type channel_type = dev->wl->hw->conf.channel_type; + struct ieee80211_channel *channel = dev->wl->hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&dev->wl->hw->conf.chandef); if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) { if ((new_channel < 1) || (new_channel > 14)) diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index 3c35382ee6c..949a3bdeede 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -5530,8 +5530,9 @@ static void b43_nphy_op_switch_analog(struct b43_wldev *dev, bool on) static int b43_nphy_op_switch_channel(struct b43_wldev *dev, unsigned int new_channel) { - struct ieee80211_channel *channel = dev->wl->hw->conf.channel; - enum nl80211_channel_type channel_type = dev->wl->hw->conf.channel_type; + struct ieee80211_channel *channel = dev->wl->hw->conf.chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&dev->wl->hw->conf.chandef); if (b43_current_band(dev->wl) == IEEE80211_BAND_2GHZ) { if ((new_channel < 1) || (new_channel > 14)) diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 8c3f70e1a01..57266882186 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2720,7 +2720,7 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, goto out_unlock_mutex; /* Switch the PHY mode (if necessary). */ - switch (conf->channel->band) { + switch (conf->chandef.chan->band) { case IEEE80211_BAND_2GHZ: if (phy->type == B43legacy_PHYTYPE_B) new_phymode = B43legacy_PHYMODE_B; @@ -2748,8 +2748,9 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, /* Switch to the requested channel. * The firmware takes care of races with the TX handler. */ - if (conf->channel->hw_value != phy->channel) - b43legacy_radio_selectchannel(dev, conf->channel->hw_value, 0); + if (conf->chandef.chan->hw_value != phy->channel) + b43legacy_radio_selectchannel(dev, conf->chandef.chan->hw_value, + 0); dev->wl->radiotap_enabled = !!(conf->flags & IEEE80211_CONF_MONITOR); @@ -3558,7 +3559,7 @@ static int b43legacy_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = dev->stats.link_noise; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/channel.c b/drivers/net/wireless/brcm80211/brcmsmac/channel.c index 10ee314c422..cc87926f505 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/channel.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/channel.c @@ -379,7 +379,7 @@ brcms_c_channel_set_chanspec(struct brcms_cm_info *wlc_cm, u16 chanspec, u8 local_constraint_qdbm) { struct brcms_c_info *wlc = wlc_cm->wlc; - struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.channel; + struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.chandef.chan; struct txpwr_limits txpwr; brcms_c_channel_reg_limits(wlc_cm, chanspec, &txpwr); @@ -404,7 +404,7 @@ brcms_c_channel_reg_limits(struct brcms_cm_info *wlc_cm, u16 chanspec, struct txpwr_limits *txpwr) { struct brcms_c_info *wlc = wlc_cm->wlc; - struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.channel; + struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.chandef.chan; uint i; uint chan; int maxpwr; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index aa5f43fee5e..70731d23ddb 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -414,10 +414,10 @@ static int brcms_ops_config(struct ieee80211_hw *hw, u32 changed) new_int); } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { - if (conf->channel_type == NL80211_CHAN_HT20 || - conf->channel_type == NL80211_CHAN_NO_HT) + if (conf->chandef.width == NL80211_CHAN_WIDTH_20 || + conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) err = brcms_c_set_channel(wl->wlc, - conf->channel->hw_value); + conf->chandef.chan->hw_value); else err = -ENOTSUPP; } diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index 8ef02dca8f8..e0dc1838cd1 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -5099,7 +5099,7 @@ int brcms_c_up(struct brcms_c_info *wlc) wlc->pub->up = true; if (wlc->bandinit_pending) { - ch = wlc->pub->ieee_hw->conf.channel; + ch = wlc->pub->ieee_hw->conf.chandef.chan; brcms_c_suspend_mac_and_wait(wlc); brcms_c_set_chanspec(wlc, ch20mhz_chspec(ch->hw_value)); wlc->bandinit_pending = false; @@ -7748,7 +7748,7 @@ bool brcms_c_dpc(struct brcms_c_info *wlc, bool bounded) void brcms_c_init(struct brcms_c_info *wlc, bool mute_tx) { struct bcma_device *core = wlc->hw->d11core; - struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.channel; + struct ieee80211_channel *ch = wlc->pub->ieee_hw->conf.chandef.chan; u16 chanspec; brcms_dbg_info(core, "wl%d\n", wlc->pub->unit); diff --git a/drivers/net/wireless/iwlegacy/3945-rs.c b/drivers/net/wireless/iwlegacy/3945-rs.c index d4fd29ad90d..c9f197d9ca1 100644 --- a/drivers/net/wireless/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/iwlegacy/3945-rs.c @@ -347,7 +347,7 @@ il3945_rs_rate_init(struct il_priv *il, struct ieee80211_sta *sta, u8 sta_id) psta = (struct il3945_sta_priv *)sta->drv_priv; rs_sta = &psta->rs_sta; - sband = hw->wiphy->bands[conf->channel->band]; + sband = hw->wiphy->bands[conf->chandef.chan->band]; rs_sta->il = il; diff --git a/drivers/net/wireless/iwlegacy/4965-rs.c b/drivers/net/wireless/iwlegacy/4965-rs.c index e8324b5e5bf..1d92a59a85a 100644 --- a/drivers/net/wireless/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/iwlegacy/4965-rs.c @@ -2299,7 +2299,7 @@ il4965_rs_rate_init(struct il_priv *il, struct ieee80211_sta *sta, u8 sta_id) sta_priv = (struct il_station_priv *)sta->drv_priv; lq_sta = &sta_priv->lq_sta; - sband = hw->wiphy->bands[conf->channel->band]; + sband = hw->wiphy->bands[conf->chandef.chan->band]; lq_sta->lq.sta_id = sta_id; diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index 722bfb57cfd..025d8b0eeaf 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4974,7 +4974,7 @@ il_mac_config(struct ieee80211_hw *hw, u32 changed) struct il_priv *il = hw->priv; const struct il_channel_info *ch_info; struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; struct il_ht_config *ht_conf = &il->current_ht_config; unsigned long flags = 0; int ret = 0; diff --git a/drivers/net/wireless/iwlwifi/dvm/rs.c b/drivers/net/wireless/iwlwifi/dvm/rs.c index abe30426726..907bd6e50aa 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rs.c +++ b/drivers/net/wireless/iwlwifi/dvm/rs.c @@ -2831,7 +2831,7 @@ void iwl_rs_rate_init(struct iwl_priv *priv, struct ieee80211_sta *sta, u8 sta_i sta_priv = (struct iwl_station_priv *) sta->drv_priv; lq_sta = &sta_priv->lq_sta; - sband = hw->wiphy->bands[conf->channel->band]; + sband = hw->wiphy->bands[conf->chandef.chan->band]; lq_sta->lq.sta_id = sta_id; diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index 23be948cf16..085c589e714 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -78,8 +78,9 @@ void iwl_connection_init_rx_config(struct iwl_priv *priv, ctx->staging.flags |= RXON_FLG_SHORT_PREAMBLE_MSK; #endif - ctx->staging.channel = cpu_to_le16(priv->hw->conf.channel->hw_value); - priv->band = priv->hw->conf.channel->band; + ctx->staging.channel = + cpu_to_le16(priv->hw->conf.chandef.chan->hw_value); + priv->band = priv->hw->conf.chandef.chan->band; iwl_set_flags_for_band(priv, ctx, priv->band, ctx->vif); @@ -951,7 +952,7 @@ static void iwl_calc_basic_rates(struct iwl_priv *priv, unsigned long basic = ctx->vif->bss_conf.basic_rates; int i; - sband = priv->hw->wiphy->bands[priv->hw->conf.channel->band]; + sband = priv->hw->wiphy->bands[priv->hw->conf.chandef.chan->band]; for_each_set_bit(i, &basic, BITS_PER_LONG) { int hw = sband->bitrates[i].hw_value; @@ -1181,7 +1182,7 @@ int iwlagn_mac_config(struct ieee80211_hw *hw, u32 changed) struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); struct iwl_rxon_context *ctx; struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; int ret = 0; IWL_DEBUG_MAC80211(priv, "enter: changed %#x\n", changed); diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index 7001856241e..088de9d25c3 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -412,9 +412,9 @@ static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed) struct ieee80211_conf *conf = &hw->conf; lbtf_deb_enter(LBTF_DEB_MACOPS); - if (conf->channel->center_freq != priv->cur_freq) { - priv->cur_freq = conf->channel->center_freq; - lbtf_set_channel(priv, conf->channel->hw_value); + if (conf->chandef.chan->center_freq != priv->cur_freq) { + priv->cur_freq = conf->chandef.chan->center_freq; + lbtf_set_channel(priv, conf->chandef.chan->hw_value); } lbtf_deb_leave(LBTF_DEB_MACOPS); return 0; @@ -537,7 +537,7 @@ static int lbtf_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = priv->noise; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 0064d38276b..4ac54861e91 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1062,11 +1062,13 @@ out: return HRTIMER_NORESTART; } -static const char *hwsim_chantypes[] = { - [NL80211_CHAN_NO_HT] = "noht", - [NL80211_CHAN_HT20] = "ht20", - [NL80211_CHAN_HT40MINUS] = "ht40-", - [NL80211_CHAN_HT40PLUS] = "ht40+", +static const char * const hwsim_chanwidths[] = { + [NL80211_CHAN_WIDTH_20_NOHT] = "noht", + [NL80211_CHAN_WIDTH_20] = "ht20", + [NL80211_CHAN_WIDTH_40] = "ht40", + [NL80211_CHAN_WIDTH_80] = "vht80", + [NL80211_CHAN_WIDTH_80P80] = "vht80p80", + [NL80211_CHAN_WIDTH_160] = "vht160", }; static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) @@ -1080,18 +1082,28 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) [IEEE80211_SMPS_DYNAMIC] = "dynamic", }; - wiphy_debug(hw->wiphy, - "%s (freq=%d/%s idle=%d ps=%d smps=%s)\n", - __func__, - conf->channel ? conf->channel->center_freq : 0, - hwsim_chantypes[conf->channel_type], - !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS), - smps_modes[conf->smps_mode]); + if (conf->chandef.chan) + wiphy_debug(hw->wiphy, + "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n", + __func__, + conf->chandef.chan->center_freq, + conf->chandef.center_freq1, + conf->chandef.center_freq2, + hwsim_chanwidths[conf->chandef.width], + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); + else + wiphy_debug(hw->wiphy, + "%s (freq=0 idle=%d ps=%d smps=%s)\n", + __func__, + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); data->idle = !!(conf->flags & IEEE80211_CONF_IDLE); - data->channel = conf->channel; + data->channel = conf->chandef.chan; WARN_ON(data->channel && channels > 1); @@ -1277,7 +1289,7 @@ static int mac80211_hwsim_get_survey( return -ENOENT; /* Current channel */ - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; /* * Magically conjured noise level --- this is only ok for simulated hardware. diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 091d9a64080..9f9a1449e81 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -2837,7 +2837,9 @@ static int mwl8k_cmd_tx_power(struct ieee80211_hw *hw, struct ieee80211_conf *conf, unsigned short pwr) { - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&conf->chandef); struct mwl8k_cmd_tx_power *cmd; int rc; int i; @@ -2857,14 +2859,14 @@ static int mwl8k_cmd_tx_power(struct ieee80211_hw *hw, cmd->channel = cpu_to_le16(channel->hw_value); - if (conf->channel_type == NL80211_CHAN_NO_HT || - conf->channel_type == NL80211_CHAN_HT20) { + if (channel_type == NL80211_CHAN_NO_HT || + channel_type == NL80211_CHAN_HT20) { cmd->bw = cpu_to_le16(0x2); } else { cmd->bw = cpu_to_le16(0x4); - if (conf->channel_type == NL80211_CHAN_HT40MINUS) + if (channel_type == NL80211_CHAN_HT40MINUS) cmd->sub_ch = cpu_to_le16(0x3); - else if (conf->channel_type == NL80211_CHAN_HT40PLUS) + else if (channel_type == NL80211_CHAN_HT40PLUS) cmd->sub_ch = cpu_to_le16(0x1); } @@ -3008,7 +3010,9 @@ struct mwl8k_cmd_set_rf_channel { static int mwl8k_cmd_set_rf_channel(struct ieee80211_hw *hw, struct ieee80211_conf *conf) { - struct ieee80211_channel *channel = conf->channel; + struct ieee80211_channel *channel = conf->chandef.chan; + enum nl80211_channel_type channel_type = + cfg80211_get_chandef_type(&conf->chandef); struct mwl8k_cmd_set_rf_channel *cmd; int rc; @@ -3026,12 +3030,12 @@ static int mwl8k_cmd_set_rf_channel(struct ieee80211_hw *hw, else if (channel->band == IEEE80211_BAND_5GHZ) cmd->channel_flags |= cpu_to_le32(0x00000004); - if (conf->channel_type == NL80211_CHAN_NO_HT || - conf->channel_type == NL80211_CHAN_HT20) + if (channel_type == NL80211_CHAN_NO_HT || + channel_type == NL80211_CHAN_HT20) cmd->channel_flags |= cpu_to_le32(0x00000080); - else if (conf->channel_type == NL80211_CHAN_HT40MINUS) + else if (channel_type == NL80211_CHAN_HT40MINUS) cmd->channel_flags |= cpu_to_le32(0x000001900); - else if (conf->channel_type == NL80211_CHAN_HT40PLUS) + else if (channel_type == NL80211_CHAN_HT40PLUS) cmd->channel_flags |= cpu_to_le32(0x000000900); rc = mwl8k_post_cmd(hw, &cmd->header); @@ -3950,7 +3954,7 @@ static int mwl8k_cmd_set_new_stn_add(struct ieee80211_hw *hw, memcpy(cmd->mac_addr, sta->addr, ETH_ALEN); cmd->stn_id = cpu_to_le16(sta->aid); cmd->action = cpu_to_le16(MWL8K_STA_ACTION_ADD); - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rates = sta->supp_rates[IEEE80211_BAND_2GHZ]; else rates = sta->supp_rates[IEEE80211_BAND_5GHZ] << 5; @@ -4385,7 +4389,7 @@ static int mwl8k_cmd_update_stadb_add(struct ieee80211_hw *hw, p->ht_caps = cpu_to_le16(sta->ht_cap.cap); p->extended_ht_caps = (sta->ht_cap.ampdu_factor & 3) | ((sta->ht_cap.ampdu_density & 7) << 2); - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rates = sta->supp_rates[IEEE80211_BAND_2GHZ]; else rates = sta->supp_rates[IEEE80211_BAND_5GHZ] << 5; @@ -4868,7 +4872,7 @@ mwl8k_bss_info_changed_sta(struct ieee80211_hw *hw, struct ieee80211_vif *vif, goto out; } - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) { + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) { ap_legacy_rates = ap->supp_rates[IEEE80211_BAND_2GHZ]; } else { ap_legacy_rates = @@ -4900,7 +4904,7 @@ mwl8k_bss_info_changed_sta(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (idx) idx--; - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rate = mwl8k_rates_24[idx].hw_value; else rate = mwl8k_rates_50[idx].hw_value; @@ -4973,7 +4977,7 @@ mwl8k_bss_info_changed_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (idx) idx--; - if (hw->conf.channel->band == IEEE80211_BAND_2GHZ) + if (hw->conf.chandef.chan->band == IEEE80211_BAND_2GHZ) rate = mwl8k_rates_24[idx].hw_value; else rate = mwl8k_rates_50[idx].hw_value; @@ -5246,7 +5250,7 @@ static int mwl8k_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = priv->noise; diff --git a/drivers/net/wireless/p54/fwio.c b/drivers/net/wireless/p54/fwio.c index 9ba85106eec..b3879fbf536 100644 --- a/drivers/net/wireless/p54/fwio.c +++ b/drivers/net/wireless/p54/fwio.c @@ -402,7 +402,7 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell) struct p54_rssi_db_entry *rssi_data; unsigned int i; void *entry; - __le16 freq = cpu_to_le16(priv->hw->conf.channel->center_freq); + __le16 freq = cpu_to_le16(priv->hw->conf.chandef.chan->center_freq); skb = p54_alloc_skb(priv, P54_HDR_FLAG_CONTROL_OPSET, sizeof(*head) + 2 + sizeof(*iq_autocal) + sizeof(*body) + @@ -532,7 +532,7 @@ int p54_scan(struct p54_common *priv, u16 mode, u16 dwell) err: wiphy_err(priv->hw->wiphy, "frequency change to channel %d failed.\n", ieee80211_frequency_to_channel( - priv->hw->conf.channel->center_freq)); + priv->hw->conf.chandef.chan->center_freq)); dev_kfree_skb_any(skb); return -EINVAL; diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index ee654a691f3..067e6f2fd05 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -340,7 +340,7 @@ static int p54_config(struct ieee80211_hw *dev, u32 changed) * TODO: Use the LM_SCAN_TRAP to determine the current * operating channel. */ - priv->curchan = priv->hw->conf.channel; + priv->curchan = priv->hw->conf.chandef.chan; p54_reset_stats(priv); WARN_ON(p54_fetch_statistics(priv)); } @@ -480,7 +480,7 @@ static void p54_bss_info_changed(struct ieee80211_hw *dev, p54_set_edcf(priv); } if (changed & BSS_CHANGED_BASIC_RATES) { - if (dev->conf.channel->band == IEEE80211_BAND_5GHZ) + if (dev->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) priv->basic_rate_mask = (info->basic_rates << 4); else priv->basic_rate_mask = info->basic_rates; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 12f0a34477f..f95de0d1621 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -354,13 +354,13 @@ static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb) rx_status->signal = p54_rssi_to_dbm(priv, hdr->rssi); if (hdr->rate & 0x10) rx_status->flag |= RX_FLAG_SHORTPRE; - if (priv->hw->conf.channel->band == IEEE80211_BAND_5GHZ) + if (priv->hw->conf.chandef.chan->band == IEEE80211_BAND_5GHZ) rx_status->rate_idx = (rate < 4) ? 0 : rate - 4; else rx_status->rate_idx = rate; rx_status->freq = freq; - rx_status->band = priv->hw->conf.channel->band; + rx_status->band = priv->hw->conf.chandef.chan->band; rx_status->antenna = hdr->antenna; tsf32 = le32_to_cpu(hdr->tsf32); diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index a658b4bc7da..34456b45acb 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -2763,7 +2763,7 @@ static void rt2800_config_txpower(struct rt2x00_dev *rt2x00dev, void rt2800_gain_calibration(struct rt2x00_dev *rt2x00dev) { - rt2800_config_txpower(rt2x00dev, rt2x00dev->hw->conf.channel, + rt2800_config_txpower(rt2x00dev, rt2x00dev->hw->conf.chandef.chan, rt2x00dev->tx_power); } EXPORT_SYMBOL_GPL(rt2800_gain_calibration); @@ -2898,11 +2898,11 @@ void rt2800_config(struct rt2x00_dev *rt2x00dev, if (flags & IEEE80211_CONF_CHANGE_CHANNEL) { rt2800_config_channel(rt2x00dev, libconf->conf, &libconf->rf, &libconf->channel); - rt2800_config_txpower(rt2x00dev, libconf->conf->channel, + rt2800_config_txpower(rt2x00dev, libconf->conf->chandef.chan, libconf->conf->power_level); } if (flags & IEEE80211_CONF_CHANGE_POWER) - rt2800_config_txpower(rt2x00dev, libconf->conf->channel, + rt2800_config_txpower(rt2x00dev, libconf->conf->chandef.chan, libconf->conf->power_level); if (flags & IEEE80211_CONF_CHANGE_RETRY_LIMITS) rt2800_config_retry_limit(rt2x00dev, libconf); @@ -5563,7 +5563,7 @@ int rt2800_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; rt2800_register_read(rt2x00dev, CH_IDLE_STA, &idle); rt2800_register_read(rt2x00dev, CH_BUSY_STA, &busy); diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c index 49a63e97393..8cb43f8f3ef 100644 --- a/drivers/net/wireless/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/rt2x00/rt2x00config.c @@ -184,7 +184,7 @@ static u16 rt2x00ht_center_channel(struct rt2x00_dev *rt2x00dev, /* * Initialize center channel to current channel. */ - center_channel = spec->channels[conf->channel->hw_value].channel; + center_channel = spec->channels[conf->chandef.chan->hw_value].channel; /* * Adjust center channel to HT40+ and HT40- operation. @@ -199,7 +199,7 @@ static u16 rt2x00ht_center_channel(struct rt2x00_dev *rt2x00dev, return i; WARN_ON(1); - return conf->channel->hw_value; + return conf->chandef.chan->hw_value; } void rt2x00lib_config(struct rt2x00_dev *rt2x00dev, @@ -227,7 +227,7 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev, hw_value = rt2x00ht_center_channel(rt2x00dev, conf); } else { clear_bit(CONFIG_CHANNEL_HT40, &rt2x00dev->flags); - hw_value = conf->channel->hw_value; + hw_value = conf->chandef.chan->hw_value; } memcpy(&libconf.rf, @@ -279,8 +279,8 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev, else clear_bit(CONFIG_POWERSAVING, &rt2x00dev->flags); - rt2x00dev->curr_band = conf->channel->band; - rt2x00dev->curr_freq = conf->channel->center_freq; + rt2x00dev->curr_band = conf->chandef.chan->band; + rt2x00dev->curr_freq = conf->chandef.chan->center_freq; rt2x00dev->tx_power = conf->power_level; rt2x00dev->short_retry = conf->short_frame_max_tx_count; rt2x00dev->long_retry = conf->long_frame_max_tx_count; diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index f95792cfcf8..f85035cc836 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -847,7 +847,7 @@ static void rt61pci_config_lna_gain(struct rt2x00_dev *rt2x00dev, u16 eeprom; short lna_gain = 0; - if (libconf->conf->channel->band == IEEE80211_BAND_2GHZ) { + if (libconf->conf->chandef.chan->band == IEEE80211_BAND_2GHZ) { if (test_bit(CAPABILITY_EXTERNAL_LNA_BG, &rt2x00dev->cap_flags)) lna_gain += 14; diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 24eec66e9fd..a3387b146bb 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -739,7 +739,7 @@ static void rt73usb_config_lna_gain(struct rt2x00_dev *rt2x00dev, u16 eeprom; short lna_gain = 0; - if (libconf->conf->channel->band == IEEE80211_BAND_2GHZ) { + if (libconf->conf->chandef.chan->band == IEEE80211_BAND_2GHZ) { if (test_bit(CAPABILITY_EXTERNAL_LNA_BG, &rt2x00dev->cap_flags)) lna_gain += 14; diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c index 1b3c2843221..91a04e2b8ec 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c @@ -147,8 +147,8 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev) signal = priv->rf->calc_rssi(agc, sq); } rx_status.signal = signal; - rx_status.freq = dev->conf.channel->center_freq; - rx_status.band = dev->conf.channel->band; + rx_status.freq = dev->conf.chandef.chan->center_freq; + rx_status.band = dev->conf.chandef.chan->band; rx_status.mactime = le64_to_cpu(entry->tsft); rx_status.flag |= RX_FLAG_MACTIME_START; if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR) diff --git a/drivers/net/wireless/rtl818x/rtl8180/grf5101.c b/drivers/net/wireless/rtl818x/rtl8180/grf5101.c index 5ee7589dd54..077ff92cc13 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/grf5101.c +++ b/drivers/net/wireless/rtl818x/rtl8180/grf5101.c @@ -82,7 +82,8 @@ static void grf5101_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8180_priv *priv = dev->priv; - int channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + int channel = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); u32 txpw = priv->channels[channel - 1].hw_value & 0xFF; u32 chan = channel - 1; diff --git a/drivers/net/wireless/rtl818x/rtl8180/max2820.c b/drivers/net/wireless/rtl818x/rtl8180/max2820.c index 667b3363d43..4715000c94d 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/max2820.c +++ b/drivers/net/wireless/rtl818x/rtl8180/max2820.c @@ -95,7 +95,7 @@ static void max2820_rf_set_channel(struct ieee80211_hw *dev, { struct rtl8180_priv *priv = dev->priv; int channel = conf ? - ieee80211_frequency_to_channel(conf->channel->center_freq) : 1; + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq) : 1; unsigned int chan_idx = channel - 1; u32 txpw = priv->channels[chan_idx].hw_value & 0xFF; u32 chan = max2820_chan[chan_idx]; diff --git a/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c b/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c index 7c4574ba9d7..cc2a5412c1f 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c +++ b/drivers/net/wireless/rtl818x/rtl8180/rtl8225.c @@ -719,7 +719,8 @@ static void rtl8225_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8180_priv *priv = dev->priv; - int chan = ieee80211_frequency_to_channel(conf->channel->center_freq); + int chan = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); if (priv->rf->init == rtl8225_rf_init) rtl8225_rf_set_tx_power(dev, chan); diff --git a/drivers/net/wireless/rtl818x/rtl8180/sa2400.c b/drivers/net/wireless/rtl818x/rtl8180/sa2400.c index 44771a6286a..b3ec40f6bd2 100644 --- a/drivers/net/wireless/rtl818x/rtl8180/sa2400.c +++ b/drivers/net/wireless/rtl818x/rtl8180/sa2400.c @@ -105,7 +105,8 @@ static void sa2400_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8180_priv *priv = dev->priv; - int channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + int channel = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); u32 txpw = priv->channels[channel - 1].hw_value & 0xFF; u32 chan = sa2400_chan[channel - 1]; diff --git a/drivers/net/wireless/rtl818x/rtl8187/dev.c b/drivers/net/wireless/rtl818x/rtl8187/dev.c index 4574bd21370..f49220e234b 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187/dev.c @@ -379,8 +379,8 @@ static void rtl8187_rx_cb(struct urb *urb) rate = (flags >> 20) & 0xF; skb_trim(skb, flags & 0x0FFF); rx_status.rate_idx = rate; - rx_status.freq = dev->conf.channel->center_freq; - rx_status.band = dev->conf.channel->band; + rx_status.freq = dev->conf.chandef.chan->center_freq; + rx_status.band = dev->conf.chandef.chan->band; rx_status.flag |= RX_FLAG_MACTIME_START; if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR) rx_status.flag |= RX_FLAG_FAILED_FCS_CRC; diff --git a/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c b/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c index 908903f721f..f0bf35fedba 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c +++ b/drivers/net/wireless/rtl818x/rtl8187/rtl8225.c @@ -905,7 +905,8 @@ static void rtl8225_rf_set_channel(struct ieee80211_hw *dev, struct ieee80211_conf *conf) { struct rtl8187_priv *priv = dev->priv; - int chan = ieee80211_frequency_to_channel(conf->channel->center_freq); + int chan = + ieee80211_frequency_to_channel(conf->chandef.chan->center_freq); if (priv->rf->init == rtl8225_rf_init) rtl8225_rf_set_tx_power(dev, chan); diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c index 99c5cea3fe2..0e7866d1d0e 100644 --- a/drivers/net/wireless/rtlwifi/base.c +++ b/drivers/net/wireless/rtlwifi/base.c @@ -691,7 +691,7 @@ int rtlwifi_rate_mapping(struct ieee80211_hw *hw, int rate_idx; if (false == isht) { - if (IEEE80211_BAND_2GHZ == hw->conf.channel->band) { + if (IEEE80211_BAND_2GHZ == hw->conf.chandef.chan->band) { switch (desc_rate) { case DESC92_RATE1M: rate_idx = 0; @@ -1365,7 +1365,7 @@ int rtl_send_smps_action(struct ieee80211_hw *hw, rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); info->control.rates[0].idx = 0; - info->band = hw->conf.channel->band; + info->band = hw->conf.chandef.chan->band; rtlpriv->intf_ops->adapter_tx(hw, sta, skb, &tcb_desc); } err_free: diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index b5a7a260bf6..64a41ecf86c 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -320,7 +320,7 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { - struct ieee80211_channel *channel = hw->conf.channel; + struct ieee80211_channel *channel = hw->conf.chandef.chan; u8 wide_chan = (u8) channel->hw_value; /* @@ -332,7 +332,7 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) *info for cisco1253 bw20, so we modify *it here based on UPPER & LOWER */ - switch (hw->conf.channel_type) { + switch (cfg80211_get_chandef_type(&hw->conf.chandef)) { case NL80211_CHAN_HT20: case NL80211_CHAN_NO_HT: /* SC */ @@ -390,7 +390,7 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) rtlpriv->cfg->ops->switch_channel(hw); rtlpriv->cfg->ops->set_channel_access(hw); rtlpriv->cfg->ops->set_bw_mode(hw, - hw->conf.channel_type); + cfg80211_get_chandef_type(&hw->conf.chandef)); } mutex_unlock(&rtlpriv->locks.conf_mutex); diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c index b9b1a6e0b16..27e4ebd5109 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c @@ -544,8 +544,8 @@ bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw, stats->timestamp_low = GET_RX_DESC_TSFL(pdesc); stats->rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(pdesc); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(pdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c index b6222eedb83..f0dada53015 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c @@ -324,8 +324,8 @@ bool rtl92cu_rx_query_desc(struct ieee80211_hw *hw, && (GET_RX_DESC_FAGGR(pdesc) == 1)); stats->timestamp_low = GET_RX_DESC_TSFL(pdesc); stats->rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(pdesc); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(pdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; if (!GET_RX_DESC_SWDEC(pdesc)) @@ -395,8 +395,8 @@ static void _rtl_rx_process(struct ieee80211_hw *hw, struct sk_buff *skb) stats.rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(rxdesc); /* TODO: is center_freq changed when doing scan? */ /* TODO: Shall we add protection or just skip those two step? */ - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(rxdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; if (!GET_RX_DESC_SWDEC(rxdesc)) diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/rtlwifi/rtl8192de/trx.c index 941080e03c0..b8ec718a0fa 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192de/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192de/trx.c @@ -499,8 +499,8 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, && (GET_RX_DESC_FAGGR(pdesc) == 1)); stats->timestamp_low = GET_RX_DESC_TSFL(pdesc); stats->rx_is40Mhzpacket = (bool) GET_RX_DESC_BW(pdesc); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; if (GET_RX_DESC_CRC32(pdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; if (!GET_RX_DESC_SWDEC(pdesc)) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c index 7b0a2e75b8b..0b074f11f96 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c @@ -538,8 +538,8 @@ bool rtl92se_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, if (stats->hwerror) return false; - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; hdr = (struct ieee80211_hdr *)(skb->data + stats->rx_drvinfo_size + stats->rx_bufshift); diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c index ac081297db5..601261d67e8 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c @@ -304,8 +304,8 @@ bool rtl8723ae_rx_query_desc(struct ieee80211_hw *hw, status->is_cck = RTL8723E_RX_HAL_IS_CCK_RATE(status->rate); - rx_status->freq = hw->conf.channel->center_freq; - rx_status->band = hw->conf.channel->band; + rx_status->freq = hw->conf.chandef.chan->center_freq; + rx_status->band = hw->conf.chandef.chan->band; hdr = (struct ieee80211_hdr *)(skb->data + status->rx_drvinfo_size + status->rx_bufshift); diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index bbbf68cf50a..3291ffa9527 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -572,7 +572,8 @@ static int wl1251_op_config(struct ieee80211_hw *hw, u32 changed) struct ieee80211_conf *conf = &hw->conf; int channel, ret = 0; - channel = ieee80211_frequency_to_channel(conf->channel->center_freq); + channel = ieee80211_frequency_to_channel( + conf->chandef.chan->center_freq); wl1251_debug(DEBUG_MAC80211, "mac80211 config ch %d psm %s power %d", channel, @@ -1223,7 +1224,7 @@ static int wl1251_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = SURVEY_INFO_NOISE_DBM; survey->noise = wl->noise; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index a9f7041c719..c26cb095010 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4474,7 +4474,7 @@ static int wl1271_op_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - survey->channel = conf->channel; + survey->channel = conf->chandef.chan; survey->filled = 0; return 0; } diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 114364b5d46..c6208a7988e 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1156,10 +1156,10 @@ static int zd_op_config(struct ieee80211_hw *hw, u32 changed) struct ieee80211_conf *conf = &hw->conf; spin_lock_irq(&mac->lock); - mac->channel = conf->channel->hw_value; + mac->channel = conf->chandef.chan->hw_value; spin_unlock_irq(&mac->lock); - return zd_chip_set_channel(&mac->chip, conf->channel->hw_value); + return zd_chip_set_channel(&mac->chip, conf->chandef.chan->hw_value); } static void zd_beacon_done(struct zd_mac *mac) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 23a275a9a3b..64faf015dd1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -974,8 +974,7 @@ enum ieee80211_smps_mode { * @power_level: requested transmit power (in dBm), backward compatibility * value only that is set to the minimum of all interfaces * - * @channel: the channel to tune to - * @channel_type: the channel (HT) type + * @chandef: the channel definition to tune to * @radar_enabled: whether radar detection is enabled * * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame @@ -1001,8 +1000,7 @@ struct ieee80211_conf { u8 long_frame_max_tx_count, short_frame_max_tx_count; - struct ieee80211_channel *channel; - enum nl80211_channel_type channel_type; + struct cfg80211_chan_def chandef; bool radar_enabled; enum ieee80211_smps_mode smps_mode; }; @@ -4216,31 +4214,33 @@ void ieee80211_rate_control_unregister(struct rate_control_ops *ops); static inline bool conf_is_ht20(struct ieee80211_conf *conf) { - return conf->channel_type == NL80211_CHAN_HT20; + return conf->chandef.width == NL80211_CHAN_WIDTH_20; } static inline bool conf_is_ht40_minus(struct ieee80211_conf *conf) { - return conf->channel_type == NL80211_CHAN_HT40MINUS; + return conf->chandef.width == NL80211_CHAN_WIDTH_40 && + conf->chandef.center_freq1 < conf->chandef.chan->center_freq; } static inline bool conf_is_ht40_plus(struct ieee80211_conf *conf) { - return conf->channel_type == NL80211_CHAN_HT40PLUS; + return conf->chandef.width == NL80211_CHAN_WIDTH_40 && + conf->chandef.center_freq1 > conf->chandef.chan->center_freq; } static inline bool conf_is_ht40(struct ieee80211_conf *conf) { - return conf_is_ht40_minus(conf) || conf_is_ht40_plus(conf); + return conf->chandef.width == NL80211_CHAN_WIDTH_40; } static inline bool conf_is_ht(struct ieee80211_conf *conf) { - return conf->channel_type != NL80211_CHAN_NO_HT; + return conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT; } static inline enum nl80211_iftype diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 50aaf25d473..6e43feb49a7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -805,8 +805,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, IEEE80211_CHANCTX_EXCLUSIVE); } } else if (local->open_count == local->monitors) { - local->_oper_channel = chandef->chan; - local->_oper_channel_type = cfg80211_get_chandef_type(chandef); + local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } @@ -3373,9 +3372,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, if (local->use_chanctx) *chandef = local->monitor_chandef; else - cfg80211_chandef_create(chandef, - local->_oper_channel, - local->_oper_channel_type); + *chandef = local->_oper_chandef; ret = 0; } rcu_read_unlock(); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 78c0d90dd64..8024874ba95 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -22,7 +22,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH); if (!local->use_chanctx) { - local->_oper_channel_type = cfg80211_get_chandef_type(chandef); + local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } } @@ -77,9 +77,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->mode = mode; if (!local->use_chanctx) { - local->_oper_channel_type = - cfg80211_get_chandef_type(chandef); - local->_oper_channel = chandef->chan; + local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } else { err = drv_add_chanctx(local, ctx); @@ -106,7 +104,10 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, WARN_ON_ONCE(ctx->refcount != 0); if (!local->use_chanctx) { - local->_oper_channel_type = NL80211_CHAN_NO_HT; + struct cfg80211_chan_def *chandef = &local->_oper_chandef; + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = chandef->chan->center_freq; + chandef->center_freq2 = 0; ieee80211_hw_config(local, 0); } else { drv_remove_chanctx(local, ctx); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c7f8b8b29e5..f9782f0f434 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1021,8 +1021,7 @@ struct ieee80211_local { struct ieee80211_sub_if_data __rcu *scan_sdata; struct ieee80211_channel *csa_channel; /* For backward compatibility only -- do not use */ - struct ieee80211_channel *_oper_channel; - enum nl80211_channel_type _oper_channel_type; + struct cfg80211_chan_def _oper_chandef; /* Temporary remain-on-channel for off-channel operations */ struct ieee80211_channel *tmp_channel; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b0d28682186..a16b037c9d3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -95,42 +95,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work) static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_channel *chan; + struct cfg80211_chan_def chandef = {}; u32 changed = 0; int power; - enum nl80211_channel_type channel_type; u32 offchannel_flag; offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; + if (local->scan_channel) { - chan = local->scan_channel; + chandef.chan = local->scan_channel; /* If scanning on oper channel, use whatever channel-type * is currently in use. */ - if (chan == local->_oper_channel) - channel_type = local->_oper_channel_type; - else - channel_type = NL80211_CHAN_NO_HT; + if (chandef.chan == local->_oper_chandef.chan) { + chandef = local->_oper_chandef; + } else { + chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + chandef.center_freq1 = chandef.chan->center_freq; + } } else if (local->tmp_channel) { - chan = local->tmp_channel; - channel_type = NL80211_CHAN_NO_HT; - } else { - chan = local->_oper_channel; - channel_type = local->_oper_channel_type; - } - - if (chan != local->_oper_channel || - channel_type != local->_oper_channel_type) + chandef.chan = local->tmp_channel; + chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + chandef.center_freq1 = chandef.chan->center_freq; + } else + chandef = local->_oper_chandef; + + WARN(!cfg80211_chandef_valid(&chandef), + "control:%d MHz width:%d center: %d/%d MHz", + chandef.chan->center_freq, chandef.width, + chandef.center_freq1, chandef.center_freq2); + + if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef)) local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; else local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; - if (offchannel_flag || chan != local->hw.conf.channel || - channel_type != local->hw.conf.channel_type) { - local->hw.conf.channel = chan; - local->hw.conf.channel_type = channel_type; + if (offchannel_flag || + !cfg80211_chandef_identical(&local->hw.conf.chandef, + &local->_oper_chandef)) { + local->hw.conf.chandef = chandef; changed |= IEEE80211_CONF_CHANGE_CHANNEL; } @@ -146,7 +151,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) changed |= IEEE80211_CONF_CHANGE_SMPS; } - power = chan->max_power; + power = chandef.chan->max_power; rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -740,11 +745,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) sband = local->hw.wiphy->bands[band]; if (!sband) continue; - if (!local->use_chanctx && !local->_oper_channel) { + if (!local->use_chanctx && !local->_oper_chandef.chan) { /* init channel we're on */ - local->hw.conf.channel = - local->_oper_channel = &sband->channels[0]; - local->hw.conf.channel_type = NL80211_CHAN_NO_HT; + struct cfg80211_chan_def chandef = { + .chan = &sband->channels[0], + .width = NL80211_CHAN_NO_HT, + .center_freq1 = sband->channels[0].center_freq, + .center_freq2 = 0 + }; + local->hw.conf.chandef = local->_oper_chandef = chandef; } cfg80211_chandef_create(&local->monitor_chandef, &sband->channels[0], diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9958cb7df8f..237e2ef42ba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -988,6 +988,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); + struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (!ieee80211_sdata_running(sdata)) @@ -997,21 +998,30 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - sdata->local->_oper_channel = sdata->local->csa_channel; - if (!sdata->local->ops->channel_switch) { + /* + * FIXME: Here we are downgrading to NL80211_CHAN_WIDTH_20_NOHT + * and don't adjust our ht/vht settings + * This is wrong - we should behave according to the CSA params + */ + local->_oper_chandef.chan = local->csa_channel; + local->_oper_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + local->_oper_chandef.center_freq1 = + local->_oper_chandef.chan->center_freq; + local->_oper_chandef.center_freq2 = 0; + + if (!local->ops->channel_switch) { /* call "hw_config" only if doing sw channel switch */ - ieee80211_hw_config(sdata->local, - IEEE80211_CONF_CHANGE_CHANNEL); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } else { /* update the device channel directly */ - sdata->local->hw.conf.channel = sdata->local->_oper_channel; + local->hw.conf.chandef = local->_oper_chandef; } /* XXX: shouldn't really modify cfg80211-owned data! */ - ifmgd->associated->channel = sdata->local->_oper_channel; + ifmgd->associated->channel = local->_oper_chandef.chan; /* XXX: wait for a beacon first? */ - ieee80211_wake_queues_by_reason(&sdata->local->hw, + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); out: diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index cb34cbbaa20..581764f92e1 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -384,7 +384,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, { int i; struct ieee80211_sub_if_data *sdata; - enum ieee80211_band band = local->hw.conf.channel->band; + enum ieee80211_band band = local->hw.conf.chandef.chan->band; u32 tx_flags; tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; @@ -401,7 +401,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->scan_req->ssids[i].ssid_len, local->scan_req->ie, local->scan_req->ie_len, local->scan_req->rates[band], false, - tx_flags, local->hw.conf.channel, true); + tx_flags, local->hw.conf.chandef.chan, true); /* * After sending probe requests, wait for probe responses @@ -467,7 +467,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->ops->hw_scan) { __set_bit(SCAN_HW_SCANNING, &local->scanning); } else if ((req->n_channels == 1) && - (req->channels[0] == local->_oper_channel)) { + (req->channels[0] == local->_oper_chandef.chan)) { /* * If we are scanning only on the operating channel * then we do not need to stop normal activities diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d79e374e129..8286dcef228 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -28,27 +28,27 @@ #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANDEF_ENTRY __field(u32, control_freq) \ - __field(u32, chan_width) \ - __field(u32, center_freq1) \ +#define CHANDEF_ENTRY __field(u32, control_freq) \ + __field(u32, chan_width) \ + __field(u32, center_freq1) \ __field(u32, center_freq2) -#define CHANDEF_ASSIGN(c) \ - __entry->control_freq = (c)->chan->center_freq; \ - __entry->chan_width = (c)->width; \ - __entry->center_freq1 = (c)->center_freq1; \ +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ + __entry->chan_width = (c)->width; \ + __entry->center_freq1 = (c)->center_freq1; \ __entry->center_freq2 = (c)->center_freq2; #define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" -#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ +#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ __entry->center_freq1, __entry->center_freq2 -#define CHANCTX_ENTRY CHANDEF_ENTRY \ - __field(u8, rx_chains_static) \ +#define CHANCTX_ENTRY CHANDEF_ENTRY \ + __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) -#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ - __entry->rx_chains_static = ctx->conf.rx_chains_static; \ +#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ + __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic #define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" -#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic @@ -286,8 +286,7 @@ TRACE_EVENT(drv_config, __field(u16, listen_interval) __field(u8, long_frame_max_tx_count) __field(u8, short_frame_max_tx_count) - __field(int, center_freq) - __field(int, channel_type) + CHANDEF_ENTRY __field(int, smps) ), @@ -303,15 +302,13 @@ TRACE_EVENT(drv_config, local->hw.conf.long_frame_max_tx_count; __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; - __entry->center_freq = local->hw.conf.channel ? - local->hw.conf.channel->center_freq : 0; - __entry->channel_type = local->hw.conf.channel_type; + CHANDEF_ASSIGN(&local->hw.conf.chandef) __entry->smps = local->hw.conf.smps_mode; ), TP_printk( - LOCAL_PR_FMT " ch:%#x freq:%d", - LOCAL_PR_ARG, __entry->changed, __entry->center_freq + LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT, + LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4a83d8dea84..aad0bf5d881 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1709,7 +1709,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, if (chanctx_conf) chan = chanctx_conf->def.chan; else if (!local->use_chanctx) - chan = local->_oper_channel; + chan = local->_oper_chandef.chan; else goto fail_rcu; @@ -1843,7 +1843,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, * This is the exception! WDS style interfaces are prohibited * when channel contexts are in used so this must be valid */ - band = local->hw.conf.channel->band; + band = local->hw.conf.chandef.chan->band; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 90cc2b82869..1734cd21c7c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2171,8 +2171,7 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) /* currently not handled */ WARN_ON(1); else { - cfg80211_chandef_create(&chandef, local->hw.conf.channel, - local->hw.conf.channel_type); + chandef = local->hw.conf.chandef; cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); } } -- cgit v1.2.3-70-g09d2 From c5116e9d8d2de324f13a91fe5afc308cd6b0ca93 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Tue, 19 Mar 2013 16:58:58 +0100 Subject: ssb: define more board types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/ssb/ssb.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 8b1322296fe..c64999fd166 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -340,13 +340,61 @@ enum ssb_bustype { #define SSB_BOARDVENDOR_DELL 0x1028 /* Dell */ #define SSB_BOARDVENDOR_HP 0x0E11 /* HP */ /* board_type */ +#define SSB_BOARD_BCM94301CB 0x0406 +#define SSB_BOARD_BCM94301MP 0x0407 +#define SSB_BOARD_BU4309 0x040A +#define SSB_BOARD_BCM94309CB 0x040B +#define SSB_BOARD_BCM4309MP 0x040C +#define SSB_BOARD_BU4306 0x0416 #define SSB_BOARD_BCM94306MP 0x0418 #define SSB_BOARD_BCM4309G 0x0421 #define SSB_BOARD_BCM4306CB 0x0417 -#define SSB_BOARD_BCM4309MP 0x040C +#define SSB_BOARD_BCM94306PC 0x0425 /* pcmcia 3.3v 4306 card */ +#define SSB_BOARD_BCM94306CBSG 0x042B /* with SiGe PA */ +#define SSB_BOARD_PCSG94306 0x042D /* with SiGe PA */ +#define SSB_BOARD_BU4704SD 0x042E /* with sdram */ +#define SSB_BOARD_BCM94704AGR 0x042F /* dual 11a/11g Router */ +#define SSB_BOARD_BCM94308MP 0x0430 /* 11a-only minipci */ +#define SSB_BOARD_BU4318 0x0447 +#define SSB_BOARD_CB4318 0x0448 +#define SSB_BOARD_MPG4318 0x0449 #define SSB_BOARD_MP4318 0x044A -#define SSB_BOARD_BU4306 0x0416 -#define SSB_BOARD_BU4309 0x040A +#define SSB_BOARD_SD4318 0x044B +#define SSB_BOARD_BCM94306P 0x044C /* with SiGe */ +#define SSB_BOARD_BCM94303MP 0x044E +#define SSB_BOARD_BCM94306MPM 0x0450 +#define SSB_BOARD_BCM94306MPL 0x0453 +#define SSB_BOARD_PC4303 0x0454 /* pcmcia */ +#define SSB_BOARD_BCM94306MPLNA 0x0457 +#define SSB_BOARD_BCM94306MPH 0x045B +#define SSB_BOARD_BCM94306PCIV 0x045C +#define SSB_BOARD_BCM94318MPGH 0x0463 +#define SSB_BOARD_BU4311 0x0464 +#define SSB_BOARD_BCM94311MC 0x0465 +#define SSB_BOARD_BCM94311MCAG 0x0466 +/* 4321 boards */ +#define SSB_BOARD_BU4321 0x046B +#define SSB_BOARD_BU4321E 0x047C +#define SSB_BOARD_MP4321 0x046C +#define SSB_BOARD_CB2_4321 0x046D +#define SSB_BOARD_CB2_4321_AG 0x0066 +#define SSB_BOARD_MC4321 0x046E +/* 4325 boards */ +#define SSB_BOARD_BCM94325DEVBU 0x0490 +#define SSB_BOARD_BCM94325BGABU 0x0491 +#define SSB_BOARD_BCM94325SDGWB 0x0492 +#define SSB_BOARD_BCM94325SDGMDL 0x04AA +#define SSB_BOARD_BCM94325SDGMDL2 0x04C6 +#define SSB_BOARD_BCM94325SDGMDL3 0x04C9 +#define SSB_BOARD_BCM94325SDABGWBA 0x04E1 +/* 4322 boards */ +#define SSB_BOARD_BCM94322MC 0x04A4 +#define SSB_BOARD_BCM94322USB 0x04A8 /* dualband */ +#define SSB_BOARD_BCM94322HM 0x04B0 +#define SSB_BOARD_BCM94322USB2D 0x04Bf /* single band discrete front end */ +/* 4312 boards */ +#define SSB_BOARD_BU4312 0x048A +#define SSB_BOARD_BCM4312MCGSG 0x04B5 /* chip_package */ #define SSB_CHIPPACK_BCM4712S 1 /* Small 200pin 4712 */ #define SSB_CHIPPACK_BCM4712M 2 /* Medium 225pin 4712 */ -- cgit v1.2.3-70-g09d2 From 3e6998574fde0ab7a3329c9229394dd80462ead2 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Tue, 19 Mar 2013 16:58:59 +0100 Subject: bcma: define board types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using that IDs we can write workarounds for various cards Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/bcma/bcma.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index e0ce311011c..0ab6712fd76 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -173,6 +173,60 @@ struct bcma_host_ops { #define BCMA_CHIP_ID_BCM53572 53572 #define BCMA_PKG_ID_BCM47188 9 +/* Board types (on PCI usually equals to the subsystem dev id) */ +/* BCM4313 */ +#define BCMA_BOARD_TYPE_BCM94313BU 0X050F +#define BCMA_BOARD_TYPE_BCM94313HM 0X0510 +#define BCMA_BOARD_TYPE_BCM94313EPA 0X0511 +#define BCMA_BOARD_TYPE_BCM94313HMG 0X051C +/* BCM4716 */ +#define BCMA_BOARD_TYPE_BCM94716NR2 0X04CD +/* BCM43224 */ +#define BCMA_BOARD_TYPE_BCM943224X21 0X056E +#define BCMA_BOARD_TYPE_BCM943224X21_FCC 0X00D1 +#define BCMA_BOARD_TYPE_BCM943224X21B 0X00E9 +#define BCMA_BOARD_TYPE_BCM943224M93 0X008B +#define BCMA_BOARD_TYPE_BCM943224M93A 0X0090 +#define BCMA_BOARD_TYPE_BCM943224X16 0X0093 +#define BCMA_BOARD_TYPE_BCM94322X9 0X008D +#define BCMA_BOARD_TYPE_BCM94322M35E 0X008E +/* BCM43228 */ +#define BCMA_BOARD_TYPE_BCM943228BU8 0X0540 +#define BCMA_BOARD_TYPE_BCM943228BU9 0X0541 +#define BCMA_BOARD_TYPE_BCM943228BU 0X0542 +#define BCMA_BOARD_TYPE_BCM943227HM4L 0X0543 +#define BCMA_BOARD_TYPE_BCM943227HMB 0X0544 +#define BCMA_BOARD_TYPE_BCM943228HM4L 0X0545 +#define BCMA_BOARD_TYPE_BCM943228SD 0X0573 +/* BCM4331 */ +#define BCMA_BOARD_TYPE_BCM94331X19 0X00D6 +#define BCMA_BOARD_TYPE_BCM94331X28 0X00E4 +#define BCMA_BOARD_TYPE_BCM94331X28B 0X010E +#define BCMA_BOARD_TYPE_BCM94331PCIEBT3AX 0X00E4 +#define BCMA_BOARD_TYPE_BCM94331X12_2G 0X00EC +#define BCMA_BOARD_TYPE_BCM94331X12_5G 0X00ED +#define BCMA_BOARD_TYPE_BCM94331X29B 0X00EF +#define BCMA_BOARD_TYPE_BCM94331CSAX 0X00EF +#define BCMA_BOARD_TYPE_BCM94331X19C 0X00F5 +#define BCMA_BOARD_TYPE_BCM94331X33 0X00F4 +#define BCMA_BOARD_TYPE_BCM94331BU 0X0523 +#define BCMA_BOARD_TYPE_BCM94331S9BU 0X0524 +#define BCMA_BOARD_TYPE_BCM94331MC 0X0525 +#define BCMA_BOARD_TYPE_BCM94331MCI 0X0526 +#define BCMA_BOARD_TYPE_BCM94331PCIEBT4 0X0527 +#define BCMA_BOARD_TYPE_BCM94331HM 0X0574 +#define BCMA_BOARD_TYPE_BCM94331PCIEDUAL 0X059B +#define BCMA_BOARD_TYPE_BCM94331MCH5 0X05A9 +#define BCMA_BOARD_TYPE_BCM94331CS 0X05C6 +#define BCMA_BOARD_TYPE_BCM94331CD 0X05DA +/* BCM53572 */ +#define BCMA_BOARD_TYPE_BCM953572BU 0X058D +#define BCMA_BOARD_TYPE_BCM953572NR2 0X058E +#define BCMA_BOARD_TYPE_BCM947188NR2 0X058F +#define BCMA_BOARD_TYPE_BCM953572SDRNR2 0X0590 +/* BCM43142 */ +#define BCMA_BOARD_TYPE_BCM943142HM 0X05E0 + struct bcma_device { struct bcma_bus *bus; struct bcma_device_id id; -- cgit v1.2.3-70-g09d2 From 12bef78f0a806639daef58b1770be6ea19b2e94d Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 21 Mar 2013 16:26:19 +0100 Subject: ssb: fix sprom constant for ant_available_{bg,a} This was done accordingly to new specs. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- include/linux/ssb/ssb_regs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index 6ecfa02ddba..3a7256955b1 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -289,11 +289,11 @@ #define SSB_SPROM4_ETHPHY_ET1A_SHIFT 5 #define SSB_SPROM4_ETHPHY_ET0M (1<<14) /* MDIO for enet0 */ #define SSB_SPROM4_ETHPHY_ET1M (1<<15) /* MDIO for enet1 */ -#define SSB_SPROM4_ANTAVAIL 0x005D /* Antenna available bitfields */ -#define SSB_SPROM4_ANTAVAIL_A 0x00FF /* A-PHY bitfield */ -#define SSB_SPROM4_ANTAVAIL_A_SHIFT 0 -#define SSB_SPROM4_ANTAVAIL_BG 0xFF00 /* B-PHY and G-PHY bitfield */ -#define SSB_SPROM4_ANTAVAIL_BG_SHIFT 8 +#define SSB_SPROM4_ANTAVAIL 0x005C /* Antenna available bitfields */ +#define SSB_SPROM4_ANTAVAIL_BG 0x00FF /* B-PHY and G-PHY bitfield */ +#define SSB_SPROM4_ANTAVAIL_BG_SHIFT 0 +#define SSB_SPROM4_ANTAVAIL_A 0xFF00 /* A-PHY bitfield */ +#define SSB_SPROM4_ANTAVAIL_A_SHIFT 8 #define SSB_SPROM4_AGAIN01 0x005E /* Antenna Gain (in dBm Q5.2) */ #define SSB_SPROM4_AGAIN0 0x00FF /* Antenna 0 */ #define SSB_SPROM4_AGAIN0_SHIFT 0 -- cgit v1.2.3-70-g09d2 From c54419321455631079c7d6e60bc732dd0c5914c5 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 25 Mar 2013 14:49:35 +0000 Subject: GRE: Refactor GRE tunneling code. Following patch refactors GRE code into ip tunneling code and GRE specific code. Common tunneling code is moved to ip_tunnel module. ip_tunnel module is written as generic library which can be used by different tunneling implementations. ip_tunnel module contains following components: - packet xmit and rcv generic code. xmit flow looks like (gre_xmit/ipip_xmit)->ip_tunnel_xmit->ip_local_out. - hash table of all devices. - lookup for tunnel devices. - control plane operations like device create, destroy, ioctl, netlink operations code. - registration for tunneling modules, like gre, ipip etc. - define single pcpu_tstats dev->tstats. - struct tnl_ptk_info added to pass parsed tunnel packet parameters. ipip.h header is renamed to ip_tunnel.h Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- include/net/gre.h | 51 ++ include/net/ip6_tunnel.h | 1 + include/net/ip_tunnels.h | 177 ++++++ include/net/ipip.h | 84 --- net/ipv4/Kconfig | 5 + net/ipv4/Makefile | 1 + net/ipv4/af_inet.c | 1 - net/ipv4/gre.c | 5 - net/ipv4/ip_gre.c | 1504 ++++++++++------------------------------------ net/ipv4/ip_tunnel.c | 1035 +++++++++++++++++++++++++++++++ net/ipv4/ip_vti.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/af_inet6.c | 1 - net/ipv6/ip6_gre.c | 1 + net/ipv6/ip6_tunnel.c | 1 + net/ipv6/sit.c | 2 +- 18 files changed, 1594 insertions(+), 1283 deletions(-) create mode 100644 include/net/ip_tunnels.h delete mode 100644 include/net/ipip.h create mode 100644 net/ipv4/ip_tunnel.c (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 33427fd6251..fe9ea7d1495 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/net/gre.h b/include/net/gre.h index 82665474bcb..9f03a390c82 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -2,6 +2,7 @@ #define __LINUX_GRE_H #include +#include #define GREPROTO_CISCO 0 #define GREPROTO_PPTP 1 @@ -12,7 +13,57 @@ struct gre_protocol { void (*err_handler)(struct sk_buff *skb, u32 info); }; +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 + int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); +static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +{ + __be16 tflags = 0; + + if (flags & GRE_CSUM) + tflags |= TUNNEL_CSUM; + if (flags & GRE_ROUTING) + tflags |= TUNNEL_ROUTING; + if (flags & GRE_KEY) + tflags |= TUNNEL_KEY; + if (flags & GRE_SEQ) + tflags |= TUNNEL_SEQ; + if (flags & GRE_STRICT) + tflags |= TUNNEL_STRICT; + if (flags & GRE_REC) + tflags |= TUNNEL_REC; + if (flags & GRE_VERSION) + tflags |= TUNNEL_VERSION; + + return tflags; +} + +static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) +{ + __be16 flags = 0; + + if (tflags & TUNNEL_CSUM) + flags |= GRE_CSUM; + if (tflags & TUNNEL_ROUTING) + flags |= GRE_ROUTING; + if (tflags & TUNNEL_KEY) + flags |= GRE_KEY; + if (tflags & TUNNEL_SEQ) + flags |= GRE_SEQ; + if (tflags & TUNNEL_STRICT) + flags |= GRE_STRICT; + if (tflags & TUNNEL_REC) + flags |= GRE_REC; + if (tflags & TUNNEL_VERSION) + flags |= GRE_VERSION; + + return flags; +} + #endif diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index ebdef7f6086..4da5de10d1d 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -3,6 +3,7 @@ #include #include +#include #include #define IP6TUNNEL_ERR_TIMEO (30*HZ) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h new file mode 100644 index 00000000000..4b6f0b28f41 --- /dev/null +++ b/include/net/ip_tunnels.h @@ -0,0 +1,177 @@ +#ifndef __NET_IP_TUNNELS_H +#define __NET_IP_TUNNELS_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#endif + +/* Keep error state on tunnel for 30 sec */ +#define IPTUNNEL_ERR_TIMEO (30*HZ) + +/* 6rd prefix/relay information */ +#ifdef CONFIG_IPV6_SIT_6RD +struct ip_tunnel_6rd_parm { + struct in6_addr prefix; + __be32 relay_prefix; + u16 prefixlen; + u16 relay_prefixlen; +}; +#endif + +struct ip_tunnel_prl_entry { + struct ip_tunnel_prl_entry __rcu *next; + __be32 addr; + u16 flags; + struct rcu_head rcu_head; +}; + +struct ip_tunnel { + struct ip_tunnel __rcu *next; + struct hlist_node hash_node; + struct net_device *dev; + + int err_count; /* Number of arrived ICMP errors */ + unsigned long err_time; /* Time when the last ICMP error + * arrived */ + + /* These four fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ + __u32 o_seqno; /* The last output seqno */ + int hlen; /* Precalculated header length */ + int mlink; + + struct ip_tunnel_parm parms; + + /* for SIT */ +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd_parm ip6rd; +#endif + struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ + unsigned int prl_count; /* # of entries in PRL */ + int ip_tnl_net_id; + struct gro_cells gro_cells; +}; + +#define TUNNEL_CSUM __cpu_to_be16(0x01) +#define TUNNEL_ROUTING __cpu_to_be16(0x02) +#define TUNNEL_KEY __cpu_to_be16(0x04) +#define TUNNEL_SEQ __cpu_to_be16(0x08) +#define TUNNEL_STRICT __cpu_to_be16(0x10) +#define TUNNEL_REC __cpu_to_be16(0x20) +#define TUNNEL_VERSION __cpu_to_be16(0x40) +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) + +struct tnl_ptk_info { + __be16 flags; + __be16 proto; + __be32 key; + __be32 seq; +}; + +#define PACKET_RCVD 0 +#define PACKET_REJECT 1 + +#define IP_TNL_HASH_BITS 10 +#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) + +struct ip_tunnel_net { + struct hlist_head *tunnels; + struct net_device *fb_tunnel_dev; +}; + +int ip_tunnel_init(struct net_device *dev); +void ip_tunnel_uninit(struct net_device *dev); +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname); + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params); +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); + +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot); +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key); + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error); +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +void ip_tunnel_setup(struct net_device *dev, int net_id); + +/* Extract dsfield from inner protocol */ +static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, + const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ipv6_get_dsfield((const struct ipv6hdr *)iph); + else + return 0; +} + +/* Propogate ECN bits out */ +static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, + const struct sk_buff *skb) +{ + u8 inner = ip_tunnel_get_dsfield(iph, skb); + + return INET_ECN_encapsulate(tos, inner); +} + +static inline void tunnel_ip_select_ident(struct sk_buff *skb, + const struct iphdr *old_iph, + struct dst_entry *dst) +{ + struct iphdr *iph = ip_hdr(skb); + + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); +} + +static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err; + int pkt_len = skb->len - skb_transport_offset(skb); + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + + nf_reset(skb); + + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } +} +#endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/ipip.h b/include/net/ipip.h deleted file mode 100644 index 483b91a10bb..00000000000 --- a/include/net/ipip.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef __NET_IPIP_H -#define __NET_IPIP_H 1 - -#include -#include -#include - -/* Keep error state on tunnel for 30 sec */ -#define IPTUNNEL_ERR_TIMEO (30*HZ) - -/* 6rd prefix/relay information */ -struct ip_tunnel_6rd_parm { - struct in6_addr prefix; - __be32 relay_prefix; - u16 prefixlen; - u16 relay_prefixlen; -}; - -struct ip_tunnel { - struct ip_tunnel __rcu *next; - struct net_device *dev; - - int err_count; /* Number of arrived ICMP errors */ - unsigned long err_time; /* Time when the last ICMP error arrived */ - - /* These four fields used only by GRE */ - __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ - int hlen; /* Precalculated GRE header length */ - int mlink; - - struct ip_tunnel_parm parms; - - /* for SIT */ -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd_parm ip6rd; -#endif - struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ - unsigned int prl_count; /* # of entries in PRL */ - - struct gro_cells gro_cells; -}; - -struct ip_tunnel_prl_entry { - struct ip_tunnel_prl_entry __rcu *next; - __be32 addr; - u16 flags; - struct rcu_head rcu_head; -}; - -static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) -{ - int err; - int pkt_len = skb->len - skb_transport_offset(skb); - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - - nf_reset(skb); - - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } -} - -static inline void tunnel_ip_select_ident(struct sk_buff *skb, - const struct iphdr *old_iph, - struct dst_entry *dst) -{ - struct iphdr *iph = ip_hdr(skb); - - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); -} -#endif diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7944df76845..2073226a8a6 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -186,9 +186,14 @@ config NET_IPGRE_DEMUX This is helper module to demultiplex GRE packets on GRE version field criteria. Required by ip_gre and pptp modules. +config NET_IP_TUNNEL + tristate + default n + config NET_IPGRE tristate "IP: GRE tunnels over IP" depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX + select NET_IP_TUNNEL help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15ca63ec604..089cb9f3638 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o +obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70b2d4cf580..93824c57b10 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -111,7 +111,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 7a4c710c4cd..d2d5a99fba0 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -27,11 +27,6 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); -struct gre_base_hdr { - __be16 flags; - __be16 protocol; -}; -#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2e94289a17e..ad662e906f7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -108,15 +108,6 @@ fatal route to network, even if it were you who configured fatal static route: you are innocent. :-) - - - 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain - practically identical code. It would be good to glue them - together, but it is not very evident, how to make them modular. - sit is integral part of IPv6, ipip and gre are naturally modular. - We could extract common parts (hash table, ioctl etc) - to a separate module (ip_tunnel.c). - Alexey Kuznetsov. */ @@ -126,400 +117,135 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); -static void ipgre_tunnel_setup(struct net_device *dev); -static int ipgre_tunnel_bind_dev(struct net_device *dev); - -/* Fallback tunnel: no source, no destination, no key, no options */ - -#define HASH_SIZE 16 static int ipgre_net_id __read_mostly; -struct ipgre_net { - struct ip_tunnel __rcu *tunnels[4][HASH_SIZE]; - - struct net_device *fb_tunnel_dev; -}; - -/* Tunnel hash table */ - -/* - 4 hash tables: - - 3: (remote,local) - 2: (remote,*) - 1: (*,local) - 0: (*,*) +static int gre_tap_net_id __read_mostly; - We require exact key match i.e. if a key is present in packet - it will match only tunnel with the same key; if it is not present, - it will match only keyless tunnel. - - All keysless packets, if not matched configured keyless tunnels - will match fallback tunnel. - */ +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); -#define tunnels_r_l tunnels[3] -#define tunnels_r tunnels[2] -#define tunnels_l tunnels[1] -#define tunnels_wc tunnels[0] + if (!csum) + break; + /* Fall through. */ -static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; } - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; + return csum; } -/* Does key in tunnel parameters match packet */ -static bool ipgre_key_match(const struct ip_tunnel_parm *p, - __be16 flags, __be32 key) +static int ip_gre_calc_hlen(__be16 o_flags) { - if (p->i_flags & GRE_KEY) { - if (flags & GRE_KEY) - return key == p->i_key; - else - return false; /* key expected, none present */ - } else - return !(flags & GRE_KEY); -} + int addend = 4; -/* Given src, dst and key, find appropriate for input tunnel. */ + if (o_flags&TUNNEL_CSUM) + addend += 4; + if (o_flags&TUNNEL_KEY) + addend += 4; + if (o_flags&TUNNEL_SEQ) + addend += 4; + return addend; +} -static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, - __be32 remote, __be32 local, - __be16 flags, __be32 key, - __be16 gre_proto) +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, int *hdr_len) { - struct net *net = dev_net(dev); - int link = dev->ifindex; - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(key); - struct ip_tunnel *t, *cand = NULL; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int dev_type = (gre_proto == htons(ETH_P_TEB)) ? - ARPHRD_ETHER : ARPHRD_IPGRE; - int score, cand_score = 4; - - for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { - if (local != t->parms.iph.saddr || - remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } - - for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { - if (remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + struct iphdr *iph = ip_hdr(skb); + struct gre_base_hdr *greh; + __be32 *options; - for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; - for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { - if (t->parms.i_key != key || - !(t->dev->flags & IFF_UP)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; - if (cand != NULL) - return cand; + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + *hdr_len = ip_gre_calc_hlen(tpi->flags); - dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) - return netdev_priv(dev); + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; - return NULL; -} + tpi->proto = greh->protocol; -static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel_parm *parms) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - unsigned int h = HASH(key); - int prio = 0; - - if (local) - prio |= 1; - if (remote && !ipv4_is_multicast(remote)) { - prio |= 2; - h ^= HASH(remote); + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; } - return &ign->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel *t) -{ - return __ipgre_bucket(ign, &t->parms); -} - -static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t); + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); -} + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; -static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp; - struct ip_tunnel *iter; - - for (tp = ipgre_bucket(ign, t); - (iter = rtnl_dereference(*tp)) != NULL; - tp = &iter->next) { - if (t == iter) { - rcu_assign_pointer(*tp, t->next); - break; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + *hdr_len += 4; + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; } } -} - -static struct ip_tunnel *ipgre_tunnel_find(struct net *net, - struct ip_tunnel_parm *parms, - int type) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - int link = parms->link; - struct ip_tunnel *t; - struct ip_tunnel __rcu **tp; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - for (tp = __ipgre_bucket(ign, parms); - (t = rtnl_dereference(*tp)) != NULL; - tp = &t->next) - if (local == t->parms.iph.saddr && - remote == t->parms.iph.daddr && - key == t->parms.i_key && - link == t->parms.link && - type == t->dev->type) - break; - - return t; -} - -static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) -{ - struct ip_tunnel *t, *nt; - struct net_device *dev; - char name[IFNAMSIZ]; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); - if (t || !create) - return t; - - if (parms->name[0]) - strlcpy(name, parms->name, IFNAMSIZ); - else - strcpy(name, "gre%d"); - - dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); - if (!dev) - return NULL; - - dev_net_set(dev, net); - - nt = netdev_priv(dev); - nt->parms = *parms; - dev->rtnl_link_ops = &ipgre_link_ops; - dev->mtu = ipgre_tunnel_bind_dev(dev); - - if (register_netdevice(dev) < 0) - goto failed_free; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); - return nt; - -failed_free: - free_netdev(dev); - return NULL; -} - -static void ipgre_tunnel_uninit(struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - ipgre_tunnel_unlink(ign, netdev_priv(dev)); - dev_put(dev); + return 0; } - static void ipgre_err(struct sk_buff *skb, u32 info) { -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. + /* All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. - Moreover, Cisco "wise men" put GRE key to the third word - in GRE header. It makes impossible maintaining even soft state for keyed - GRE tunnels with enabled checksum. Tell them "thank you". - - Well, I wonder, rfc1812 was written by Cisco employee, - what the hell these idiots break standards established - by themselves??? - */ + Moreover, Cisco "wise men" put GRE key to the third word + in GRE header. It makes impossible maintaining even soft + state for keyed GRE tunnels with enabled checksum. Tell + them "thank you". + Well, I wonder, rfc1812 was written by Cisco employee, + what the hell these idiots break standards established + by themselves??? + */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph = (const struct iphdr *)skb->data; - __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); - int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - __be16 flags; - __be32 key = 0; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { + if (!csum_err) /* ignore csum errors. */ return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (skb_headlen(skb) < grehlen) - return; - - if (flags & GRE_KEY) - key = *(((__be32 *)p) + (grehlen / 4) - 1); - switch (type) { default: case ICMP_PARAMETERPROB: @@ -548,8 +274,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, - flags, key, p[1]); + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); + + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->daddr, iph->saddr, tpi.key); if (t == NULL) return; @@ -578,158 +309,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_time = jiffies; } -static inline u8 -ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) -{ - u8 inner = 0; - if (skb->protocol == htons(ETH_P_IP)) - inner = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - return INET_ECN_encapsulate(tos, inner); -} - static int ipgre_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph; - u8 *h; - __be16 flags; - __sum16 csum = 0; - __be32 key = 0; - u32 seqno = 0; struct ip_tunnel *tunnel; - int offset = 4; - __be16 gre_proto; - int err; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - if (!pskb_may_pull(skb, 16)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) goto drop; - iph = ip_hdr(skb); - h = skb->data; - flags = *(__be16 *)h; - - if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { - /* - Version must be 0. - - We do not support routing headers. - */ - if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop; - - if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } - offset += 4; - } - if (flags&GRE_KEY) { - key = *(__be32 *)(h + offset); - offset += 4; - } - if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32 *)(h + offset)); - offset += 4; - } - } + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); - gre_proto = *(__be16 *)(h + 2); + iph = ip_hdr(skb); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->saddr, iph->daddr, tpi.key); - tunnel = ipgre_tunnel_lookup(skb->dev, - iph->saddr, iph->daddr, flags, key, - gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; - - secpath_reset(skb); - - skb->protocol = gre_proto; - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); - if ((*(h + offset) & 0xF0) != 0x40) - offset += 4; - } - - skb->mac_header = skb->network_header; - __pskb_pull(skb, offset); - skb_postpull_rcsum(skb, skb_transport_header(skb), offset); - skb->pkt_type = PACKET_HOST; -#ifdef CONFIG_NET_IPGRE_BROADCAST - if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; - tunnel->dev->stats.multicast++; - skb->pkt_type = PACKET_BROADCAST; - } -#endif - - if (((flags&GRE_CSUM) && csum) || - (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->dev->stats.rx_crc_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - if (tunnel->parms.i_flags&GRE_SEQ) { - if (!(flags&GRE_SEQ) || - (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->dev->stats.rx_fifo_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - tunnel->i_seqno = seqno + 1; - } - - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - err = IP_ECN_decapsulate(iph, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &iph->saddr, iph->tos); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - gro_cells_receive(&tunnel->gro_cells, skb); + ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - drop: kfree_skb(skb); return 0; @@ -746,7 +352,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; return skb; } else if (skb->ip_summed == CHECKSUM_PARTIAL && - tunnel->parms.o_flags&GRE_CSUM) { + tunnel->parms.o_flags&TUNNEL_CSUM) { err = skb_checksum_help(skb); if (unlikely(err)) goto error; @@ -760,480 +366,157 @@ error: return ERR_PTR(err); } -static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *gre_build_header(struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + int hdr_len) { - struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph; - const struct iphdr *tiph; - struct flowi4 fl4; - u8 tos; - __be16 df; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ - int gre_hlen; - __be32 dst; - int mtu; - u8 ttl; - int err; - - skb = handle_offloads(tunnel, skb); - if (IS_ERR(skb)) { - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } + struct gre_base_hdr *greh; - if (!skb->encapsulation) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb_push(skb, hdr_len); - old_iph = ip_hdr(skb); + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; - if (dev->type == ARPHRD_ETHER) - IPCB(skb)->flags = 0; + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - if (dev->header_ops && dev->type == ARPHRD_IPGRE) { - gre_hlen = 0; - tiph = (const struct iphdr *)skb->data; - } else { - gre_hlen = tunnel->hlen; - tiph = &tunnel->parms.iph; - } - - if ((dst = tiph->daddr) == 0) { - /* NBMA tunnel */ - - if (skb_dst(skb) == NULL) { - dev->stats.tx_fifo_errors++; - goto tx_error; + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; } - - if (skb->protocol == htons(ETH_P_IP)) { - rt = skb_rtable(skb); - dst = rt_nexthop(rt, old_iph->daddr); + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - const struct in6_addr *addr6; - struct neighbour *neigh; - bool do_tx_error_icmp; - int addr_type; - - neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); - if (neigh == NULL) - goto tx_error; - - addr6 = (const struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); - - if (addr_type == IPV6_ADDR_ANY) { - addr6 = &ipv6_hdr(skb)->daddr; - addr_type = ipv6_addr_type(addr6); - } - - if ((addr_type & IPV6_ADDR_COMPATv4) == 0) - do_tx_error_icmp = true; - else { - do_tx_error_icmp = false; - dst = addr6->s6_addr32[3]; - } - neigh_release(neigh); - if (do_tx_error_icmp) - goto tx_error_icmp; + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *(__sum16 *)ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); } -#endif - else - goto tx_error; } - ttl = tiph->ttl; - tos = tiph->tos; - if (tos & 0x1) { - tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) - tos = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - } + return skb; +} - rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, - tunnel->parms.o_key, RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; - } - tdev = rt->dst.dev; +static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params, + __be16 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct tnl_ptk_info tpi; - if (tdev == dev) { - ip_rt_put(rt); - dev->stats.collisions++; - goto tx_error; + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; } - df = tiph->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (old_iph->frag_off&htons(IP_DF)); + tpi.flags = tunnel->parms.o_flags; + tpi.proto = proto; + tpi.key = tunnel->parms.o_key; + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + tpi.seq = htonl(tunnel->o_seqno); - if (!skb_is_gso(skb) && - (old_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } + /* Push GRE header. */ + skb = gre_build_header(skb, &tpi, tunnel->hlen); + if (unlikely(!skb)) { + dev->stats.tx_dropped++; + return; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - if (!skb_is_gso(skb) && - mtu >= IPV6_MIN_MTU && - mtu < skb->len - tunnel->hlen + gre_hlen) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } - } -#endif + ip_tunnel_xmit(skb, dev, tnl_params); +} - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; +static netdev_tx_t ipgre_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tnl_params; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; - - if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; - if (!new_skb) { - ip_rt_put(rt); - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - dev_kfree_skb(skb); - skb = new_skb; - old_iph = ip_hdr(skb); - /* Warning : tiph value might point to freed memory */ - } + if (dev->header_ops) { + /* Need space for new headers */ + if (skb_cow_head(skb, dev->needed_headroom - + (tunnel->hlen + sizeof(struct iphdr)))); + goto free_skb; - skb_push(skb, gre_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*iph)); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ + tnl_params = (const struct iphdr *)skb->data; - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = IPPROTO_GRE; - iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - - tunnel_ip_select_ident(skb, old_iph, &rt->dst); - - if (ttl == 0) { - if (skb->protocol == htons(ETH_P_IP)) - iph->ttl = old_iph->ttl; -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) - iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; -#endif - else - iph->ttl = ip4_dst_hoplimit(&rt->dst); - } - - ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; - ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; - - if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); + /* Pull skb since ip_tunnel_xmit() needs skb->data pointing + * to gre header. + */ + skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + } else { + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (tunnel->parms.o_flags&GRE_SEQ) { - ++tunnel->o_seqno; - *ptr = htonl(tunnel->o_seqno); - ptr--; - } - if (tunnel->parms.o_flags&GRE_KEY) { - *ptr = tunnel->parms.o_key; - ptr--; - } - /* Skip GRE checksum if skb is getting offloaded. */ - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && - (tunnel->parms.o_flags&GRE_CSUM)) { - int offset = skb_transport_offset(skb); - - if (skb_has_shared_frag(skb)) { - err = __skb_linearize(skb); - if (err) - goto tx_error; - } - - *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, - skb->len - offset, - 0)); - } + tnl_params = &tunnel->parms.iph; } - iptunnel_xmit(skb, dev); + __gre_xmit(skb, dev, tnl_params, skb->protocol); + return NETDEV_TX_OK; -#if IS_ENABLED(CONFIG_IPV6) -tx_error_icmp: - dst_link_failure(skb); -#endif -tx_error: - dev->stats.tx_errors++; +free_skb: dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; return NETDEV_TX_OK; } -static int ipgre_tunnel_bind_dev(struct net_device *dev) +static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, + struct net_device *dev) { - struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; - const struct iphdr *iph; - int hlen = LL_MAX_HEADER; - int mtu = ETH_DATA_LEN; - int addend = sizeof(struct iphdr) + 4; - - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; - - /* Guess output device to choose reasonable mtu and needed_headroom */ - - if (iph->daddr) { - struct flowi4 fl4; - struct rtable *rt; - - rt = ip_route_output_gre(dev_net(dev), &fl4, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); - if (!IS_ERR(rt)) { - tdev = rt->dst.dev; - ip_rt_put(rt); - } - - if (dev->type != ARPHRD_ETHER) - dev->flags |= IFF_POINTOPOINT; - } + struct ip_tunnel *tunnel = netdev_priv(dev); - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - if (tdev) { - hlen = tdev->hard_header_len + tdev->needed_headroom; - mtu = tdev->mtu; - } - dev->iflink = tunnel->parms.link; - - /* Precalculate GRE options length */ - if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { - if (tunnel->parms.o_flags&GRE_CSUM) - addend += 4; - if (tunnel->parms.o_flags&GRE_KEY) - addend += 4; - if (tunnel->parms.o_flags&GRE_SEQ) - addend += 4; - } - dev->needed_headroom = addend + hlen; - mtu -= dev->hard_header_len + addend; + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (mtu < 68) - mtu = 68; + __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); - tunnel->hlen = addend; - /* TCP offload with GRE SEQ is not supported. */ - if (!(tunnel->parms.o_flags & GRE_SEQ)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + return NETDEV_TX_OK; - return mtu; +free_skb: + dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; + return NETDEV_TX_OK; } -static int -ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - switch (cmd) { - case SIOCGETTUNNEL: - t = NULL; - if (dev == ign->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipgre_tunnel_locate(net, &p, 0); - } - if (t == NULL) - t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; - break; - - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - if (!(p.i_flags&GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags&GRE_KEY)) - p.o_key = 0; - - t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - unsigned int nflags = 0; - - t = netdev_priv(dev); - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { - err = -EINVAL; - break; - } - ipgre_tunnel_unlink(ign, t); - synchronize_net(); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - t->parms.o_key = p.o_key; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - } - if (t) { - err = 0; - if (cmd == SIOCCHGTUNNEL) { - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - if (t->parms.link != p.link) { - t->parms.link = p.link; - dev->mtu = ipgre_tunnel_bind_dev(dev); - netdev_state_change(dev); - } - } - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; - - case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == ign->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) - goto done; - err = -EPERM; - if (t == netdev_priv(ign->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; - - default: - err = -EINVAL; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { + return -EINVAL; } + p.i_flags = gre_flags_to_tnl_flags(p.i_flags); + p.o_flags = gre_flags_to_tnl_flags(p.o_flags); -done: - return err; -} + err = ip_tunnel_ioctl(dev, &p, cmd); + if (err) + return err; -static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) - return -EINVAL; - dev->mtu = new_mtu; + p.i_flags = tnl_flags_to_gre_flags(p.i_flags); + p.o_flags = tnl_flags_to_gre_flags(p.o_flags); + + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; return 0; } @@ -1263,25 +546,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) ... ftp fec0:6666:6666::193.233.7.65 ... - */ - static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ip_tunnel *t = netdev_priv(dev); - struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); - __be16 *p = (__be16 *)(iph+1); + struct iphdr *iph; + struct gre_base_hdr *greh; - memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); - p[0] = t->parms.o_flags; - p[1] = htons(type); + iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); + greh = (struct gre_base_hdr *)(iph+1); + greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); + greh->protocol = htons(type); - /* - * Set the source hardware address. - */ + memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); + /* Set the source hardware address. */ if (saddr) memcpy(&iph->saddr, saddr, 4); if (daddr) @@ -1289,7 +570,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (iph->daddr) return t->hlen; - return -t->hlen; + return -(t->hlen + sizeof(*iph)); } static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) @@ -1343,31 +624,21 @@ static int ipgre_close(struct net_device *dev) } return 0; } - #endif static const struct net_device_ops ipgre_netdev_ops = { .ndo_init = ipgre_tunnel_init, - .ndo_uninit = ipgre_tunnel_uninit, + .ndo_uninit = ip_tunnel_uninit, #ifdef CONFIG_NET_IPGRE_BROADCAST .ndo_open = ipgre_open, .ndo_stop = ipgre_close, #endif - .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_start_xmit = ipgre_xmit, .ndo_do_ioctl = ipgre_tunnel_ioctl, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; -static void ipgre_dev_free(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - - gro_cells_destroy(&tunnel->gro_cells); - free_percpu(dev->tstats); - free_netdev(dev); -} - #define GRE_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ @@ -1376,35 +647,49 @@ static void ipgre_dev_free(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; - dev->destructor = ipgre_dev_free; + ip_tunnel_setup(dev, ipgre_net_id); +} - dev->type = ARPHRD_IPGRE; - dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +static void __gre_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel; + + tunnel = netdev_priv(dev); + tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); + tunnel->parms.iph.protocol = IPPROTO_GRE; + + dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->flags = IFF_NOARP; dev->iflink = 0; - dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - dev->features |= GRE_FEATURES; + dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; dev->hw_features |= GRE_FEATURES; + + if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported. */ + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } } static int ipgre_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - struct iphdr *iph; - int err; + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; + __gre_tunnel_init(dev); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + memcpy(dev->dev_addr, &iph->saddr, 4); + memcpy(dev->broadcast, &iph->daddr, 4); - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + dev->type = ARPHRD_IPGRE; + dev->flags = IFF_NOARP; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->addr_len = 4; if (iph->daddr) { #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -1418,106 +703,30 @@ static int ipgre_tunnel_init(struct net_device *dev) } else dev->header_ops = &ipgre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - err = gro_cells_init(&tunnel->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); - return err; - } - - return 0; + return ip_tunnel_init(dev); } -static void ipgre_fb_tunnel_init(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *iph = &tunnel->parms.iph; - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - iph->version = 4; - iph->protocol = IPPROTO_GRE; - iph->ihl = 5; - tunnel->hlen = sizeof(struct iphdr) + 4; - - dev_hold(dev); -} - - static const struct gre_protocol ipgre_protocol = { .handler = ipgre_rcv, .err_handler = ipgre_err, }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) -{ - int prio; - - for (prio = 0; prio < 4; prio++) { - int h; - for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - - t = rtnl_dereference(ign->tunnels[prio][h]); - - while (t != NULL) { - unregister_netdevice_queue(t->dev, head); - t = rtnl_dereference(t->next); - } - } - } -} - static int __net_init ipgre_init_net(struct net *net) { - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int err; - - ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", - ipgre_tunnel_setup); - if (!ign->fb_tunnel_dev) { - err = -ENOMEM; - goto err_alloc_dev; - } - dev_net_set(ign->fb_tunnel_dev, net); - - ipgre_fb_tunnel_init(ign->fb_tunnel_dev); - ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; - - if ((err = register_netdev(ign->fb_tunnel_dev))) - goto err_reg_dev; - - rcu_assign_pointer(ign->tunnels_wc[0], - netdev_priv(ign->fb_tunnel_dev)); - return 0; - -err_reg_dev: - ipgre_dev_free(ign->fb_tunnel_dev); -err_alloc_dev: - return err; + return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } static void __net_exit ipgre_exit_net(struct net *net) { - struct ipgre_net *ign; - LIST_HEAD(list); - - ign = net_generic(net, ipgre_net_id); - rtnl_lock(); - ipgre_destroy_tunnels(ign, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); + ip_tunnel_delete_net(itn); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit = ipgre_exit_net, .id = &ipgre_net_id, - .size = sizeof(struct ipgre_net), + .size = sizeof(struct ip_tunnel_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1562,8 +771,8 @@ out: return ipgre_tunnel_validate(tb, data); } -static void ipgre_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) +static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], + struct ip_tunnel_parm *parms) { memset(parms, 0, sizeof(*parms)); @@ -1576,10 +785,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1603,148 +812,46 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->iph.frag_off = htons(IP_DF); } -static int ipgre_tap_init(struct net_device *dev) +static int gre_tap_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - ipgre_tunnel_bind_dev(dev); + __gre_tunnel_init(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - return 0; + return ip_tunnel_init(dev); } -static const struct net_device_ops ipgre_tap_netdev_ops = { - .ndo_init = ipgre_tap_init, - .ndo_uninit = ipgre_tunnel_uninit, - .ndo_start_xmit = ipgre_tunnel_xmit, +static const struct net_device_ops gre_tap_netdev_ops = { + .ndo_init = gre_tap_init, + .ndo_uninit = ip_tunnel_uninit, + .ndo_start_xmit = gre_tap_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ipgre_tap_setup(struct net_device *dev) { - ether_setup(dev); - - dev->netdev_ops = &ipgre_tap_netdev_ops; - dev->destructor = ipgre_dev_free; - - dev->iflink = 0; - dev->features |= NETIF_F_NETNS_LOCAL; - - dev->features |= GRE_FEATURES; - dev->hw_features |= GRE_FEATURES; + dev->netdev_ops = &gre_tap_netdev_ops; + ip_tunnel_setup(dev, gre_tap_net_id); } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) +static int ipgre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int mtu; - int err; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &nt->parms); - - if (ipgre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; - - if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) - eth_hw_addr_random(dev); - - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - err = register_netdevice(dev); - if (err) - goto out; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); + struct ip_tunnel_parm p; -out: - return err; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_newlink(dev, tb, &p); } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t, *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); struct ip_tunnel_parm p; - int mtu; - - if (dev == ign->fb_tunnel_dev) - return -EINVAL; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &p); - - t = ipgre_tunnel_locate(net, &p, 0); - - if (t) { - if (t->dev != dev) - return -EEXIST; - } else { - t = nt; - - if (dev->type != ARPHRD_ETHER) { - unsigned int nflags = 0; - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags ^ nflags) & - (IFF_POINTOPOINT | IFF_BROADCAST)) - return -EINVAL; - } - ipgre_tunnel_unlink(ign, t); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - } - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - - t->parms.o_key = p.o_key; - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - - if (t->parms.link != p.link) { - t->parms.link = p.link; - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - netdev_state_change(dev); - } - - return 0; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_changelink(dev, tb, &p); } static size_t ipgre_get_size(const struct net_device *dev) @@ -1779,8 +886,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || @@ -1818,6 +925,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .validate = ipgre_tunnel_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; @@ -1831,13 +939,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .validate = ipgre_tap_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; -/* - * And now the modules code and kernel interface. - */ +static int __net_init ipgre_tap_init_net(struct net *net) +{ + return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL); +} + +static void __net_exit ipgre_tap_exit_net(struct net *net) +{ + struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); + ip_tunnel_delete_net(itn); +} + +static struct pernet_operations ipgre_tap_net_ops = { + .init = ipgre_tap_init_net, + .exit = ipgre_tap_exit_net, + .id = &gre_tap_net_id, + .size = sizeof(struct ip_tunnel_net), +}; static int __init ipgre_init(void) { @@ -1849,6 +972,10 @@ static int __init ipgre_init(void) if (err < 0) return err; + err = register_pernet_device(&ipgre_tap_net_ops); + if (err < 0) + goto pnet_tap_faied; + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); @@ -1863,16 +990,17 @@ static int __init ipgre_init(void) if (err < 0) goto tap_ops_failed; -out: - return err; + return 0; tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: + unregister_pernet_device(&ipgre_tap_net_ops); +pnet_tap_faied: unregister_pernet_device(&ipgre_net_ops); - goto out; + return err; } static void __exit ipgre_fini(void) @@ -1881,6 +1009,7 @@ static void __exit ipgre_fini(void) rtnl_link_unregister(&ipgre_link_ops); if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) pr_info("%s: can't remove protocol\n", __func__); + unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } @@ -1890,3 +1019,4 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); MODULE_ALIAS_NETDEV("gre0"); +MODULE_ALIAS_NETDEV("gretap0"); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c new file mode 100644 index 00000000000..9d96b6853f2 --- /dev/null +++ b/net/ipv4/ip_tunnel.c @@ -0,0 +1,1035 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#endif + +static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, + __be32 key, __be32 remote) +{ + return hash_32((__force u32)key ^ (__force u32)remote, + IP_TNL_HASH_BITS); +} + +/* Often modified stats are per cpu, other are shared (netdev->stats) */ +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; + tot->rx_errors = dev->stats.rx_errors; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + tot->collisions = dev->stats.collisions; + + return tot; +} +EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); + +static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, + __be16 flags, __be32 key) +{ + if (p->i_flags & TUNNEL_KEY) { + if (flags & TUNNEL_KEY) + return key == p->i_key; + else + /* key expected, none present */ + return false; + } else + return !(flags & TUNNEL_KEY); +} + +/* Fallback tunnel: no source, no destination, no key, no options + + Tunnel hash table: + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + Given src, dst and key, find appropriate for input tunnel. +*/ +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key) +{ + unsigned int hash; + struct ip_tunnel *t, *cand = NULL; + struct hlist_head *head; + + hash = ip_tunnel_hash(itn, key, remote); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local != t->parms.iph.saddr || + remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else + cand = t; + } + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + hash = ip_tunnel_hash(itn, key, 0); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if ((local != t->parms.iph.saddr && + (local != t->parms.iph.daddr || + !ipv4_is_multicast(local))) || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + if (flags & TUNNEL_NO_KEY) + goto skip_key_lookup; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + +skip_key_lookup: + if (cand) + return cand; + + if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) + return netdev_priv(itn->fb_tunnel_dev); + + + return NULL; +} +EXPORT_SYMBOL_GPL(ip_tunnel_lookup); + +static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + unsigned int h; + __be32 remote; + + if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) + remote = parms->iph.daddr; + else + remote = 0; + + h = ip_tunnel_hash(itn, parms->i_key, remote); + return &itn->tunnels[h]; +} + +static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) +{ + struct hlist_head *head = ip_bucket(itn, &t->parms); + + hlist_add_head_rcu(&t->hash_node, head); +} + +static void ip_tunnel_del(struct ip_tunnel *t) +{ + hlist_del_init_rcu(&t->hash_node); +} + +static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms, + int type) +{ + __be32 remote = parms->iph.daddr; + __be32 local = parms->iph.saddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip_tunnel *t = NULL; + struct hlist_head *head = ip_bucket(itn, parms); + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + } + return t; +} + +static struct net_device *__ip_tunnel_create(struct net *net, + const struct rtnl_link_ops *ops, + struct ip_tunnel_parm *parms) +{ + int err; + struct ip_tunnel *tunnel; + struct net_device *dev; + char name[IFNAMSIZ]; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else { + if (strlen(ops->kind) + 3 >= IFNAMSIZ) { + err = -E2BIG; + goto failed; + } + strlcpy(name, ops->kind, IFNAMSIZ); + strncat(name, "%d", 2); + } + + ASSERT_RTNL(); + dev = alloc_netdev(ops->priv_size, name, ops->setup); + if (!dev) { + err = -ENOMEM; + goto failed; + } + dev_net_set(dev, net); + + dev->rtnl_link_ops = ops; + + tunnel = netdev_priv(dev); + tunnel->parms = *parms; + + err = register_netdevice(dev); + if (err) + goto failed_free; + + return dev; + +failed_free: + free_netdev(dev); +failed: + return ERR_PTR(err); +} + +static inline struct rtable *ip_route_output_tunnel(struct net *net, + struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) +{ + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_oif = oif; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->flowi4_tos = tos; + fl4->flowi4_proto = proto; + fl4->fl4_gre_key = key; + return ip_route_output_key(net, fl4); +} + +static int ip_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *iph; + int hlen = LL_MAX_HEADER; + int mtu = ETH_DATA_LEN; + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + iph = &tunnel->parms.iph; + + /* Guess output device to choose reasonable mtu and needed_headroom */ + if (iph->daddr) { + struct flowi4 fl4; + struct rtable *rt; + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); + if (!IS_ERR(rt)) { + tdev = rt->dst.dev; + ip_rt_put(rt); + } + if (dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + + if (tdev) { + hlen = tdev->hard_header_len + tdev->needed_headroom; + mtu = tdev->mtu; + } + dev->iflink = tunnel->parms.link; + + dev->needed_headroom = t_hlen + hlen; + mtu -= (dev->hard_header_len + t_hlen); + + if (mtu < 68) + mtu = 68; + + return mtu; +} + +static struct ip_tunnel *ip_tunnel_create(struct net *net, + struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + struct ip_tunnel *nt, *fbt; + struct net_device *dev; + + BUG_ON(!itn->fb_tunnel_dev); + fbt = netdev_priv(itn->fb_tunnel_dev); + dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); + if (IS_ERR(dev)) + return NULL; + + dev->mtu = ip_tunnel_bind_dev(dev); + + nt = netdev_priv(dev); + ip_tunnel_add(itn, nt); + return nt; +} + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error) +{ + struct pcpu_tstats *tstats; + const struct iphdr *iph = ip_hdr(skb); + int err; + + secpath_reset(skb); + + skb->protocol = tpi->proto; + + skb->mac_header = skb->network_header; + __pskb_pull(skb, tunnel->hlen); + skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(iph->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + tunnel->dev->stats.multicast++; + skb->pkt_type = PACKET_BROADCAST; + } +#endif + + if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || + ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + if (tunnel->parms.i_flags&TUNNEL_SEQ) { + if (!(tpi->flags&TUNNEL_SEQ) || + (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = ntohl(tpi->seq) + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + iph = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + skb->pkt_type = PACKET_HOST; + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + gro_cells_receive(&tunnel->gro_cells, skb); + return 0; + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_rcv); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *inner_iph; + struct iphdr *iph; + struct flowi4 fl4; + u8 tos, ttl; + __be16 df; + struct rtable *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + unsigned int max_headroom; /* The extra header space needed */ + __be32 dst; + int mtu; + + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + dst = tnl_params->daddr; + if (dst == 0) { + /* NBMA tunnel */ + + if (skb_dst(skb) == NULL) { + dev->stats.tx_fifo_errors++; + goto tx_error; + } + + if (skb->protocol == htons(ETH_P_IP)) { + rt = skb_rtable(skb); + dst = rt_nexthop(rt, inner_iph->daddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + const struct in6_addr *addr6; + struct neighbour *neigh; + bool do_tx_error_icmp; + int addr_type; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (neigh == NULL) + goto tx_error; + + addr6 = (const struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) { + addr6 = &ipv6_hdr(skb)->daddr; + addr_type = ipv6_addr_type(addr6); + } + + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + do_tx_error_icmp = true; + else { + do_tx_error_icmp = false; + dst = addr6->s6_addr32[3]; + } + neigh_release(neigh); + if (do_tx_error_icmp) + goto tx_error_icmp; + } +#endif + else + goto tx_error; + } + + tos = tnl_params->tos; + if (tos & 0x1) { + tos &= ~0x1; + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + dst, tnl_params->saddr, + tunnel->parms.o_key, + RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + tdev = rt->dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + + df = tnl_params->frag_off; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr); + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + df |= (inner_iph->frag_off&htons(IP_DF)); + + if (!skb_is_gso(skb) && + (inner_iph->frag_off&htons(IP_DF)) && + mtu < ntohs(inner_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < skb->len) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ip_rt_put(rt); + goto tx_error; + } + } +#endif + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + ttl = tnl_params->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; +#endif + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) + + rt->dst.header_len; + if (max_headroom > dev->needed_headroom) { + dev->needed_headroom = max_headroom; + if (skb_cow_head(skb, dev->needed_headroom)) { + dev->stats.tx_dropped++; + dev_kfree_skb(skb); + return; + } + } + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = tnl_params->protocol; + iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + iph->daddr = fl4.daddr; + iph->saddr = fl4.saddr; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, inner_iph, &rt->dst); + + iptunnel_xmit(skb, dev); + return; + +#if IS_ENABLED(CONFIG_IPV6) +tx_error_icmp: + dst_link_failure(skb); +#endif +tx_error: + dev->stats.tx_errors++; + dev_kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_tunnel_xmit); + +static void ip_tunnel_update(struct ip_tunnel_net *itn, + struct ip_tunnel *t, + struct net_device *dev, + struct ip_tunnel_parm *p, + bool set_mtu) +{ + ip_tunnel_del(t); + t->parms.iph.saddr = p->iph.saddr; + t->parms.iph.daddr = p->iph.daddr; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->iph.saddr, 4); + memcpy(dev->broadcast, &p->iph.daddr, 4); + } + ip_tunnel_add(itn, t); + + t->parms.iph.ttl = p->iph.ttl; + t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; + + if (t->parms.link != p->link) { + int mtu; + + t->parms.link = p->link; + mtu = ip_tunnel_bind_dev(dev); + if (set_mtu) + dev->mtu = mtu; + } + netdev_state_change(dev); +} + +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + int err = 0; + struct ip_tunnel *t; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + BUG_ON(!itn->fb_tunnel_dev); + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == itn->fb_tunnel_dev) + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + if (!(p->i_flags&TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags&TUNNEL_KEY)) + p->o_key = 0; + + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + + if (!t && (cmd == SIOCADDTUNNEL)) + t = ip_tunnel_create(net, itn, p); + + if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { + err = -EINVAL; + break; + } + + t = netdev_priv(dev); + } + } + + if (t) { + err = 0; + ip_tunnel_update(itn, t, dev, p, true); + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + + if (dev == itn->fb_tunnel_dev) { + err = -ENOENT; + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(itn->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); + +static void ip_tunnel_dev_free(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + gro_cells_destroy(&tunnel->gro_cells); + free_percpu(dev->tstats); + free_netdev(dev); +} + +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (itn->fb_tunnel_dev != dev) { + ip_tunnel_del(netdev_priv(dev)); + unregister_netdevice_queue(dev, head); + } +} +EXPORT_SYMBOL_GPL(ip_tunnel_dellink); + +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname) +{ + struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); + struct ip_tunnel_parm parms; + + itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); + if (!itn->tunnels) + return -ENOMEM; + + if (!ops) { + itn->fb_tunnel_dev = NULL; + return 0; + } + memset(&parms, 0, sizeof(parms)); + if (devname) + strlcpy(parms.name, devname, IFNAMSIZ); + + rtnl_lock(); + itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); + rtnl_unlock(); + if (IS_ERR(itn->fb_tunnel_dev)) { + kfree(itn->tunnels); + return PTR_ERR(itn->fb_tunnel_dev); + } + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init_net); + +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +{ + int h; + + for (h = 0; h < IP_TNL_HASH_SIZE; h++) { + struct ip_tunnel *t; + struct hlist_node *n; + struct hlist_head *thead = &itn->tunnels[h]; + + hlist_for_each_entry_safe(t, n, thead, hash_node) + unregister_netdevice_queue(t->dev, head); + } + if (itn->fb_tunnel_dev) + unregister_netdevice_queue(itn->fb_tunnel_dev, head); +} + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) +{ + LIST_HEAD(list); + + rtnl_lock(); + ip_tunnel_destroy(itn, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); + kfree(itn->tunnels); +} +EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); + +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *nt; + struct net *net = dev_net(dev); + struct ip_tunnel_net *itn; + int mtu; + int err; + + nt = netdev_priv(dev); + itn = net_generic(net, nt->ip_tnl_net_id); + + if (ip_tunnel_find(itn, p, dev->type)) + return -EEXIST; + + nt->parms = *p; + err = register_netdevice(dev); + if (err) + goto out; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + mtu = ip_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + + ip_tunnel_add(itn, nt); + +out: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_newlink); + +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t, *nt; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (dev == itn->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + + t = ip_tunnel_find(itn, p, dev->type); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + if (dev->type != ARPHRD_ETHER) { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + } + } + + ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_changelink); + +int ip_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; + int err; + + dev->destructor = ip_tunnel_dev_free; + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&tunnel->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + iph->version = 4; + iph->ihl = 5; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init); + +void ip_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + /* fb_tunnel_dev will be unregisted in net-exit call. */ + if (itn->fb_tunnel_dev != dev) + ip_tunnel_del(netdev_priv(dev)); +} +EXPORT_SYMBOL_GPL(ip_tunnel_uninit); + +/* Do least required initialization, rest of init is done in tunnel_init call */ +void ip_tunnel_setup(struct net_device *dev, int net_id) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + tunnel->ip_tnl_net_id = net_id; +} +EXPORT_SYMBOL_GPL(ip_tunnel_setup); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c3a4233c0ac..6a628fb3349 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 34e006fe2d8..a557d6ab127 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -111,7 +111,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f95b3aa579..fd61fe16679 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -61,7 +61,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f56277f1590..ab5c7ad482c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6a6ba73ff26..df89ccaacea 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index bef3fedfdc5..1e55866cead 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -47,6 +47,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 898e671a526..ee4fc570cf2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 6752c8db8e0cfedb44ba62806dd15b383ed64000 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Mon, 25 Mar 2013 08:26:16 +0000 Subject: firewire net, ipv4 arp: Extend hardware address and remove driver-level packet inspection. Inspection of upper layer protocol is considered harmful, especially if it is about ARP or other stateful upper layer protocol; driver cannot (and should not) have full state of them. IPv4 over Firewire module used to inspect ARP (both in sending path and in receiving path), and record peer's GUID, max packet size, max speed and fifo address. This patch removes such inspection by extending our "hardware address" definition to include other information as well: max packet size, max speed and fifo. By doing this, The neighbour module in networking subsystem can cache them. Note: As we have started ignoring sspd and max_rec in ARP/NDP, those information will not be used in the driver when sending. When a packet is being sent, the IP layer fills our pseudo header with the extended "hardware address", including GUID and fifo. The driver can look-up node-id (the real but rather volatile low-level address) by GUID, and then the module can send the packet to the wire using parameters provided in the extendedn hardware address. This approach is realistic because IP over IEEE1394 (RFC2734) and IPv6 over IEEE1394 (RFC3146) share same "hardware address" format in their address resolution protocols. Here, extended "hardware address" is defined as follows: union fwnet_hwaddr { u8 u[16]; struct { __be64 uniq_id; /* EUI-64 */ u8 max_rec; /* max packet size */ u8 sspd; /* max speed */ __be16 fifo_hi; /* hi 16bits of FIFO addr */ __be32 fifo_lo; /* lo 32bits of FIFO addr */ } __packed uc; }; Note that Hardware address is declared as union, so that we can map full IP address into this, when implementing MCAP (Multicast Cannel Allocation Protocol) for IPv6, but IP and ARP subsystem do not need to know this format in detail. One difference between original ARP (RFC826) and 1394 ARP (RFC2734) is that 1394 ARP Request/Reply do not contain the target hardware address field (aka ar$tha). This difference is handled in the ARP subsystem. CC: Stephan Gatzka Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- drivers/firewire/net.c | 153 +++++++++---------------------------------------- include/linux/if_arp.h | 12 +++- include/net/firewire.h | 25 ++++++++ net/ipv4/arp.c | 27 +++++++-- 4 files changed, 82 insertions(+), 135 deletions(-) create mode 100644 include/net/firewire.h (limited to 'include') diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index cb8aa865ff8..790017eb505 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -28,6 +28,7 @@ #include #include +#include /* rx limits */ #define FWNET_MAX_FRAGMENTS 30 /* arbitrary, > TX queue depth */ @@ -57,33 +58,6 @@ #define RFC2374_HDR_LASTFRAG 2 /* last fragment */ #define RFC2374_HDR_INTFRAG 3 /* interior fragment */ -#define RFC2734_HW_ADDR_LEN 16 - -struct rfc2734_arp { - __be16 hw_type; /* 0x0018 */ - __be16 proto_type; /* 0x0806 */ - u8 hw_addr_len; /* 16 */ - u8 ip_addr_len; /* 4 */ - __be16 opcode; /* ARP Opcode */ - /* Above is exactly the same format as struct arphdr */ - - __be64 s_uniq_id; /* Sender's 64bit EUI */ - u8 max_rec; /* Sender's max packet size */ - u8 sspd; /* Sender's max speed */ - __be16 fifo_hi; /* hi 16bits of sender's FIFO addr */ - __be32 fifo_lo; /* lo 32bits of sender's FIFO addr */ - __be32 sip; /* Sender's IP Address */ - __be32 tip; /* IP Address of requested hw addr */ -} __packed; - -/* This header format is specific to this driver implementation. */ -#define FWNET_ALEN 8 -#define FWNET_HLEN 10 -struct fwnet_header { - u8 h_dest[FWNET_ALEN]; /* destination address */ - __be16 h_proto; /* packet type ID field */ -} __packed; - static bool fwnet_hwaddr_is_multicast(u8 *ha) { return !!(*ha & 1); @@ -196,8 +170,6 @@ struct fwnet_peer { struct list_head peer_link; struct fwnet_device *dev; u64 guid; - u64 fifo; - __be32 ip; /* guarded by dev->lock */ struct list_head pd_list; /* received partial datagrams */ @@ -226,6 +198,15 @@ struct fwnet_packet_task { u8 enqueued; }; +/* + * Get fifo address embedded in hwaddr + */ +static __u64 fwnet_hwaddr_fifo(union fwnet_hwaddr *ha) +{ + return (u64)get_unaligned_be16(&ha->uc.fifo_hi) << 32 + | get_unaligned_be32(&ha->uc.fifo_lo); +} + /* * saddr == NULL means use device source address. * daddr == NULL means leave destination address (eg unresolved arp). @@ -518,7 +499,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, bool is_broadcast, u16 ether_type) { struct fwnet_device *dev; - static const __be64 broadcast_hw = cpu_to_be64(~0ULL); int status; __be64 guid; @@ -537,76 +517,11 @@ static int fwnet_finish_incoming_packet(struct net_device *net, /* * Parse the encapsulation header. This actually does the job of - * converting to an ethernet frame header, as well as arp - * conversion if needed. ARP conversion is easier in this - * direction, since we are using ethernet as our backend. - */ - /* - * If this is an ARP packet, convert it. First, we want to make - * use of some of the fields, since they tell us a little bit - * about the sending machine. + * converting to an ethernet-like pseudo frame header. */ - if (ether_type == ETH_P_ARP) { - struct rfc2734_arp *arp1394; - struct arphdr *arp; - unsigned char *arp_ptr; - u64 fifo_addr; - u64 peer_guid; - struct fwnet_peer *peer; - unsigned long flags; - - arp1394 = (struct rfc2734_arp *)skb->data; - arp = (struct arphdr *)skb->data; - arp_ptr = (unsigned char *)(arp + 1); - peer_guid = get_unaligned_be64(&arp1394->s_uniq_id); - fifo_addr = (u64)get_unaligned_be16(&arp1394->fifo_hi) << 32 - | get_unaligned_be32(&arp1394->fifo_lo); - - spin_lock_irqsave(&dev->lock, flags); - peer = fwnet_peer_find_by_guid(dev, peer_guid); - if (peer) { - peer->fifo = fifo_addr; - peer->ip = arp1394->sip; - } - spin_unlock_irqrestore(&dev->lock, flags); - - if (!peer) { - dev_notice(&net->dev, - "no peer for ARP packet from %016llx\n", - (unsigned long long)peer_guid); - goto no_peer; - } - - /* - * Now that we're done with the 1394 specific stuff, we'll - * need to alter some of the data. Believe it or not, all - * that needs to be done is sender_IP_address needs to be - * moved, the destination hardware address get stuffed - * in and the hardware address length set to 8. - * - * IMPORTANT: The code below overwrites 1394 specific data - * needed above so keep the munging of the data for the - * higher level IP stack last. - */ - - arp->ar_hln = 8; - /* skip over sender unique id */ - arp_ptr += arp->ar_hln; - /* move sender IP addr */ - put_unaligned(arp1394->sip, (u32 *)arp_ptr); - /* skip over sender IP addr */ - arp_ptr += arp->ar_pln; - - if (arp->ar_op == htons(ARPOP_REQUEST)) - memset(arp_ptr, 0, sizeof(u64)); - else - memcpy(arp_ptr, net->dev_addr, sizeof(u64)); - } - - /* Now add the ethernet header. */ guid = cpu_to_be64(dev->card->guid); if (dev_hard_header(skb, net, ether_type, - is_broadcast ? &broadcast_hw : &guid, + is_broadcast ? net->broadcast : net->dev_addr, NULL, skb->len) >= 0) { struct fwnet_header *eth; u16 *rawp; @@ -649,7 +564,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, return 0; - no_peer: err: net->stats.rx_errors++; net->stats.rx_dropped++; @@ -1355,11 +1269,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) ptask->dest_node = IEEE1394_ALL_NODES; ptask->speed = SCODE_100; } else { - __be64 guid = get_unaligned((__be64 *)hdr_buf.h_dest); + union fwnet_hwaddr *ha = (union fwnet_hwaddr *)hdr_buf.h_dest; + __be64 guid = get_unaligned(&ha->uc.uniq_id); u8 generation; peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid)); - if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR) + if (!peer) goto fail; generation = peer->generation; @@ -1367,32 +1282,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) max_payload = peer->max_payload; datagram_label_ptr = &peer->datagram_label; - ptask->fifo_addr = peer->fifo; + ptask->fifo_addr = fwnet_hwaddr_fifo(ha); ptask->generation = generation; ptask->dest_node = dest_node; ptask->speed = peer->speed; } - /* If this is an ARP packet, convert it */ - if (proto == htons(ETH_P_ARP)) { - struct arphdr *arp = (struct arphdr *)skb->data; - unsigned char *arp_ptr = (unsigned char *)(arp + 1); - struct rfc2734_arp *arp1394 = (struct rfc2734_arp *)skb->data; - __be32 ipaddr; - - ipaddr = get_unaligned((__be32 *)(arp_ptr + FWNET_ALEN)); - - arp1394->hw_addr_len = RFC2734_HW_ADDR_LEN; - arp1394->max_rec = dev->card->max_receive; - arp1394->sspd = dev->card->link_speed; - - put_unaligned_be16(dev->local_fifo >> 32, - &arp1394->fifo_hi); - put_unaligned_be32(dev->local_fifo & 0xffffffff, - &arp1394->fifo_lo); - put_unaligned(ipaddr, &arp1394->sip); - } - ptask->hdr.w0 = 0; ptask->hdr.w1 = 0; ptask->skb = skb; @@ -1507,8 +1402,6 @@ static int fwnet_add_peer(struct fwnet_device *dev, peer->dev = dev; peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4]; - peer->fifo = FWNET_NO_FIFO_ADDR; - peer->ip = 0; INIT_LIST_HEAD(&peer->pd_list); peer->pdg_size = 0; peer->datagram_label = 0; @@ -1538,6 +1431,7 @@ static int fwnet_probe(struct device *_dev) struct fwnet_device *dev; unsigned max_mtu; int ret; + union fwnet_hwaddr *ha; mutex_lock(&fwnet_device_mutex); @@ -1582,8 +1476,15 @@ static int fwnet_probe(struct device *_dev) net->mtu = min(1500U, max_mtu); /* Set our hardware address while we're at it */ - put_unaligned_be64(card->guid, net->dev_addr); - put_unaligned_be64(~0ULL, net->broadcast); + ha = (union fwnet_hwaddr *)net->dev_addr; + put_unaligned_be64(card->guid, &ha->uc.uniq_id); + ha->uc.max_rec = dev->card->max_receive; + ha->uc.sspd = dev->card->link_speed; + put_unaligned_be16(dev->local_fifo >> 32, &ha->uc.fifo_hi); + put_unaligned_be32(dev->local_fifo & 0xffffffff, &ha->uc.fifo_lo); + + memset(net->broadcast, -1, net->addr_len); + ret = register_netdev(net); if (ret) goto out; @@ -1632,8 +1533,6 @@ static int fwnet_remove(struct device *_dev) mutex_lock(&fwnet_device_mutex); net = dev->netdev; - if (net && peer->ip) - arp_invalidate(net, peer->ip); fwnet_remove_peer(peer, dev); diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 89b4614a472..f563907ed77 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -33,7 +33,15 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) static inline int arp_hdr_len(struct net_device *dev) { - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + /* ARP header, device address and 2 IP addresses */ + return sizeof(struct arphdr) + dev->addr_len + sizeof(u32) * 2; +#endif + default: + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ + return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + } } #endif /* _LINUX_IF_ARP_H */ diff --git a/include/net/firewire.h b/include/net/firewire.h new file mode 100644 index 00000000000..31bcbfe7a22 --- /dev/null +++ b/include/net/firewire.h @@ -0,0 +1,25 @@ +#ifndef _NET_FIREWIRE_H +#define _NET_FIREWIRE_H + +/* Pseudo L2 address */ +#define FWNET_ALEN 16 +union fwnet_hwaddr { + u8 u[FWNET_ALEN]; + /* "Hardware address" defined in RFC2734/RF3146 */ + struct { + __be64 uniq_id; /* EUI-64 */ + u8 max_rec; /* max packet size */ + u8 sspd; /* max speed */ + __be16 fifo_hi; /* hi 16bits of FIFO addr */ + __be32 fifo_lo; /* lo 32bits of FIFO addr */ + } __packed uc; +}; + +/* Pseudo L2 Header */ +#define FWNET_HLEN 18 +struct fwnet_header { + u8 h_dest[FWNET_ALEN]; /* destination address */ + __be16 h_proto; /* packet type ID field */ +} __packed; + +#endif diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fea4929f620..247ec1951c3 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp_ptr += dev->addr_len; memcpy(arp_ptr, &src_ip, 4); arp_ptr += 4; - if (target_hw != NULL) - memcpy(arp_ptr, target_hw, dev->addr_len); - else - memset(arp_ptr, 0, dev->addr_len); - arp_ptr += dev->addr_len; + + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + if (target_hw != NULL) + memcpy(arp_ptr, target_hw, dev->addr_len); + else + memset(arp_ptr, 0, dev->addr_len); + arp_ptr += dev->addr_len; + } memcpy(arp_ptr, &dest_ip, 4); return skb; @@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb) arp_ptr += dev->addr_len; memcpy(&sip, arp_ptr, 4); arp_ptr += 4; - arp_ptr += dev->addr_len; + switch (dev_type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + arp_ptr += dev->addr_len; + } memcpy(&tip, arp_ptr, 4); /* * Check for bad requests for 127.x.x.x and requests for multicast -- cgit v1.2.3-70-g09d2 From a88b9ce5ad4fc633b159b37d3ed8af60a4008fbc Mon Sep 17 00:00:00 2001 From: Hong zhi guo Date: Mon, 25 Mar 2013 19:04:05 +0000 Subject: netlink: remove duplicated NLMSG_ALIGN NLMSG_HDRLEN is already aligned value. It's for directly reference without extra alignment. The redundant alignment here may confuse the API users. Signed-off-by: Hong Zhiguo Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 78d5b8a546d..32a354f67ba 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -78,7 +78,7 @@ struct nlmsghdr { #define NLMSG_ALIGNTO 4U #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) #define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) -#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN)) +#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) #define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ -- cgit v1.2.3-70-g09d2 From 5203cd28db6dc05c3618a602cf4cf81203d00257 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 26 Mar 2013 23:11:21 +0000 Subject: net: core: introduce skb_probe_transport_header() Sometimes, we need probe and set the transport header for packets (e.g from untrusted source). This patch introduces a new helper skb_probe_transport_header() which tries to probe and set the l4 header through skb_flow_dissect(), if not just set the transport header to the hint passed by caller. Cc: Eric Dumazet Signed-off-by: Jason Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 497412165b1..fa88b966cb8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,6 +32,7 @@ #include #include #include +#include /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 @@ -1559,6 +1560,19 @@ static inline void skb_set_transport_header(struct sk_buff *skb, skb->transport_header += offset; } +static inline void skb_probe_transport_header(struct sk_buff *skb, + const int offset_hint) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return; + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_set_transport_header(skb, offset_hint); +} + static inline unsigned char *skb_network_header(const struct sk_buff *skb) { return skb->head + skb->network_header; -- cgit v1.2.3-70-g09d2 From 1b5ab0def4f6e42e8b8097c3b11d2e8d96baafec Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 27 Mar 2013 05:55:56 +0000 Subject: net: use the frag lru_lock to protect netns_frags.nqueues update Move the protection of netns_frags.nqueues updates under the LRU_lock, instead of the write lock. As they are located on the same cacheline, and this is also needed when transitioning to use per hash bucket locking. Signed-off-by: Jesper Dangaard Brouer Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 ++ net/ipv4/inet_fragment.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 64b4e7d23b8..7cac9c5789b 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -143,6 +143,7 @@ static inline void inet_frag_lru_del(struct inet_frag_queue *q) { spin_lock(&q->net->lru_lock); list_del(&q->lru_list); + q->net->nqueues--; spin_unlock(&q->net->lru_lock); } @@ -151,6 +152,7 @@ static inline void inet_frag_lru_add(struct netns_frags *nf, { spin_lock(&nf->lru_lock); list_add_tail(&q->lru_list, &nf->lru_list); + q->net->nqueues++; spin_unlock(&nf->lru_lock); } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 8ba548a8efc..1206ca64b0e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -124,7 +124,6 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { write_lock(&f->lock); hlist_del(&fq->list); - fq->net->nqueues--; write_unlock(&f->lock); inet_frag_lru_del(fq); } @@ -260,7 +259,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &f->hash[hash]); - nf->nqueues++; write_unlock(&f->lock); inet_frag_lru_add(nf, qp); return qp; -- cgit v1.2.3-70-g09d2 From d6b688cf2f7ca3e168acc73597f4d7102ae663fa Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 27 Mar 2013 17:23:10 +0100 Subject: bcma: handle more devices in bcma_pmu_get_alp_clock() Add some more chip IDs to bcma_pmu_get_alp_clock() Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_chipcommon_pmu.c | 24 ++++++++++++++++++++---- include/linux/bcma/bcma_driver_chipcommon.h | 1 + 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c index 7e88fffaf3f..edca73af3cc 100644 --- a/drivers/bcma/driver_chipcommon_pmu.c +++ b/drivers/bcma/driver_chipcommon_pmu.c @@ -174,19 +174,35 @@ u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc) struct bcma_bus *bus = cc->core->bus; switch (bus->chipinfo.id) { + case BCMA_CHIP_ID_BCM4313: + case BCMA_CHIP_ID_BCM43224: + case BCMA_CHIP_ID_BCM43225: + case BCMA_CHIP_ID_BCM43227: + case BCMA_CHIP_ID_BCM43228: + case BCMA_CHIP_ID_BCM4331: + case BCMA_CHIP_ID_BCM43421: + case BCMA_CHIP_ID_BCM43428: + case BCMA_CHIP_ID_BCM43431: case BCMA_CHIP_ID_BCM4716: - case BCMA_CHIP_ID_BCM4748: case BCMA_CHIP_ID_BCM47162: - case BCMA_CHIP_ID_BCM4313: - case BCMA_CHIP_ID_BCM5357: + case BCMA_CHIP_ID_BCM4748: case BCMA_CHIP_ID_BCM4749: + case BCMA_CHIP_ID_BCM5357: case BCMA_CHIP_ID_BCM53572: + case BCMA_CHIP_ID_BCM6362: /* always 20Mhz */ return 20000 * 1000; - case BCMA_CHIP_ID_BCM5356: case BCMA_CHIP_ID_BCM4706: + case BCMA_CHIP_ID_BCM5356: /* always 25Mhz */ return 25000 * 1000; + case BCMA_CHIP_ID_BCM43460: + case BCMA_CHIP_ID_BCM4352: + case BCMA_CHIP_ID_BCM4360: + if (cc->status & BCMA_CC_CHIPST_4360_XTAL_40MZ) + return 40000 * 1000; + else + return 20000 * 1000; default: bcma_warn(bus, "No ALP clock specified for %04X device, pmu rev. %d, using default %d Hz\n", bus->chipinfo.id, cc->pmu.rev, BCMA_CC_PMU_ALP_CLOCK); diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 8390c474f69..1db4c6de372 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -104,6 +104,7 @@ #define BCMA_CC_CHIPST_4706_MIPS_BENDIAN BIT(3) /* 0: little, 1: big endian */ #define BCMA_CC_CHIPST_4706_PCIE1_DISABLE BIT(5) /* PCIE1 enable strap pin */ #define BCMA_CC_CHIPST_5357_NAND_BOOT BIT(4) /* NAND boot, valid for CC rev 38 and/or BCM5357 */ +#define BCMA_CC_CHIPST_4360_XTAL_40MZ 0x00000001 #define BCMA_CC_JCMD 0x0030 /* Rev >= 10 only */ #define BCMA_CC_JCMD_START 0x80000000 #define BCMA_CC_JCMD_BUSY 0x80000000 -- cgit v1.2.3-70-g09d2 From 6951618b4b0bb022429ab17d49f2fa3650f21cb4 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 27 Mar 2013 17:23:11 +0100 Subject: bcma: export bcma_chipco_get_alp_clock() This function will be used by brcmsmac. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_chipcommon.c | 3 ++- include/linux/bcma/bcma_driver_chipcommon.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index 28fa50ad87b..88db0cb7bf1 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -25,13 +25,14 @@ static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset, return value; } -static u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc) +u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc) { if (cc->capabilities & BCMA_CC_CAP_PMU) return bcma_pmu_get_alp_clock(cc); return 20000000; } +EXPORT_SYMBOL_GPL(bcma_chipco_get_alp_clock); static u32 bcma_chipco_watchdog_get_max_timer(struct bcma_drv_cc *cc) { diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 1db4c6de372..453fcc91468 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -608,6 +608,8 @@ void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks); +extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc); + void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_irq_status(struct bcma_drv_cc *cc, u32 mask); -- cgit v1.2.3-70-g09d2 From fbbdb8f096e0e5d8244e1ffa46e364146ab9a440 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 27 Mar 2013 16:46:06 +0000 Subject: net: fix compile error of implicit declaration of skb_probe_transport_header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 40893fd(net: switch to use skb_probe_transport_header()) involes a new error accidently. When NET_SKBUFF_DATA_USES_OFFSE is not enabled, below compile error happens: CC net/packet/af_packet.o net/packet/af_packet.c: In function ‘packet_sendmsg_spkt’: net/packet/af_packet.c:1516:2: error: implicit declaration of function ‘skb_probe_transport_header’ [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors make[2]: *** [net/packet/af_packet.o] Error 1 make[1]: *** [net/packet] Error 2 make: *** [net] Error 2 As it seems skb_probe_transport_header() is not related to NET_SKBUFF_DATA_USES_OFFSE, we should move the definition of skb_probe_transport_header() out of scope of NET_SKBUFF_DATA_USES_OFFSE macro. Cc: Jason Wang Cc: Eric Dumazet Signed-off-by: Ying Xue Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fa88b966cb8..878e0ee8106 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1560,19 +1560,6 @@ static inline void skb_set_transport_header(struct sk_buff *skb, skb->transport_header += offset; } -static inline void skb_probe_transport_header(struct sk_buff *skb, - const int offset_hint) -{ - struct flow_keys keys; - - if (skb_transport_header_was_set(skb)) - return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_set_transport_header(skb, offset_hint); -} - static inline unsigned char *skb_network_header(const struct sk_buff *skb) { return skb->head + skb->network_header; @@ -1716,6 +1703,19 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_probe_transport_header(struct sk_buff *skb, + const int offset_hint) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return; + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_set_transport_header(skb, offset_hint); +} + static inline void skb_mac_header_rebuild(struct sk_buff *skb) { if (skb_mac_header_was_set(skb)) { -- cgit v1.2.3-70-g09d2 From f3d4039242af92a9d93dee2fd9ae47066b20ca29 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 27 Mar 2013 10:52:28 +0000 Subject: tokenring: delete last holdout of CONFIG_TR Tokenring support was deleted in v3.5. One last holdout of the macro CONFIG_TR escaped that fate. Until now. Signed-off-by: Paul Bolle Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 56e3e066527..1dbb02c9894 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -144,8 +144,6 @@ static inline bool dev_xmit_complete(int rc) # else # define LL_MAX_HEADER 96 # endif -#elif IS_ENABLED(CONFIG_TR) -# define LL_MAX_HEADER 48 #else # define LL_MAX_HEADER 32 #endif -- cgit v1.2.3-70-g09d2 From e5c5d22e8dcf7c2d430336cbf8e180bd38e8daf1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 28 Mar 2013 13:38:25 +0900 Subject: net: add ETH_P_802_3_MIN Add a new constant ETH_P_802_3_MIN, the minimum ethernet type for an 802.3 frame. Frames with a lower value in the ethernet type field are Ethernet II. Also update all the users of this value that David Miller and I could find to use the new constant. Also correct a bug in util.c. The comparison with ETH_P_802_3_MIN should be >= not >. As suggested by Jesse Gross. Compile tested only. Cc: David Miller Cc: Jesse Gross Cc: Karsten Keil Cc: John W. Linville Cc: Johannes Berg Cc: Bart De Schuymer Cc: Stephen Hemminger Cc: Patrick McHardy Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Cc: linux-bluetooth@vger.kernel.org Cc: netfilter-devel@vger.kernel.org Cc: bridge@lists.linux-foundation.org Cc: linux-wireless@vger.kernel.org Cc: linux1394-devel@lists.sourceforge.net Cc: linux-media@vger.kernel.org Cc: netdev@vger.kernel.org Cc: dev@openvswitch.org Acked-by: Mauro Carvalho Chehab Acked-by: Stefan Richter Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/firewire/net.c | 2 +- drivers/isdn/i4l/isdn_net.c | 2 +- drivers/media/dvb-core/dvb_net.c | 10 +++++----- drivers/net/ethernet/sun/niu.c | 2 +- drivers/net/plip/plip.c | 2 +- drivers/net/wireless/ray_cs.c | 2 +- include/linux/if_vlan.h | 2 +- include/uapi/linux/if_ether.h | 3 +++ net/atm/lec.h | 2 +- net/bluetooth/bnep/netdev.c | 2 +- net/bridge/netfilter/ebtables.c | 2 +- net/ethernet/eth.c | 2 +- net/mac80211/tx.c | 2 +- net/openvswitch/datapath.c | 2 +- net/openvswitch/flow.c | 6 +++--- net/wireless/util.c | 2 +- 16 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 56796330a16..4d565365e47 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -547,7 +547,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, if (memcmp(eth->h_dest, net->dev_addr, net->addr_len)) skb->pkt_type = PACKET_OTHERHOST; } - if (ntohs(eth->h_proto) >= 1536) { + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) { protocol = eth->h_proto; } else { rawp = (u16 *)skb->data; diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index babc621a07f..88d657dff47 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -1385,7 +1385,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev) if (memcmp(eth->h_dest, dev->dev_addr, ETH_ALEN)) skb->pkt_type = PACKET_OTHERHOST; } - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 44225b186f6..83a23afb13a 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -185,7 +185,7 @@ static __be16 dvb_net_eth_type_trans(struct sk_buff *skb, skb->pkt_type=PACKET_MULTICAST; } - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; @@ -228,9 +228,9 @@ static int ule_test_sndu( struct dvb_net_priv *p ) static int ule_bridged_sndu( struct dvb_net_priv *p ) { struct ethhdr *hdr = (struct ethhdr*) p->ule_next_hdr; - if(ntohs(hdr->h_proto) < 1536) { + if(ntohs(hdr->h_proto) < ETH_P_802_3_MIN) { int framelen = p->ule_sndu_len - ((p->ule_next_hdr+sizeof(struct ethhdr)) - p->ule_skb->data); - /* A frame Type < 1536 for a bridged frame, introduces a LLC Length field. */ + /* A frame Type < ETH_P_802_3_MIN for a bridged frame, introduces a LLC Length field. */ if(framelen != ntohs(hdr->h_proto)) { return -1; } @@ -320,7 +320,7 @@ static int handle_ule_extensions( struct dvb_net_priv *p ) (int) p->ule_sndu_type, l, total_ext_len); #endif - } while (p->ule_sndu_type < 1536); + } while (p->ule_sndu_type < ETH_P_802_3_MIN); return total_ext_len; } @@ -712,7 +712,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len ) } /* Handle ULE Extension Headers. */ - if (priv->ule_sndu_type < 1536) { + if (priv->ule_sndu_type < ETH_P_802_3_MIN) { /* There is an extension header. Handle it accordingly. */ int l = handle_ule_extensions(priv); if (l < 0) { diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index e4c1c88e4c2..95cff98d8a3 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6618,7 +6618,7 @@ static u64 niu_compute_tx_flags(struct sk_buff *skb, struct ethhdr *ehdr, (len << TXHDR_LEN_SHIFT) | ((l3off / 2) << TXHDR_L3START_SHIFT) | (ihl << TXHDR_IHL_SHIFT) | - ((eth_proto_inner < 1536) ? TXHDR_LLC : 0) | + ((eth_proto_inner < ETH_P_802_3_MIN) ? TXHDR_LLC : 0) | ((eth_proto == ETH_P_8021Q) ? TXHDR_VLAN : 0) | (ipv6 ? TXHDR_IP_VER : 0) | csum_bits); diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index bed62d9c53c..1f7bef90b46 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -560,7 +560,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev) * so don't forget to remove it. */ - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 4775b5d172d..ebada812b3a 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -953,7 +953,7 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, unsigned char *data, int len) { __be16 proto = ((struct ethhdr *)data)->h_proto; - if (ntohs(proto) >= 1536) { /* DIX II ethernet frame */ + if (ntohs(proto) >= ETH_P_802_3_MIN) { /* DIX II ethernet frame */ pr_debug("ray_cs translate_frame DIX II\n"); /* Copy LLC header to card buffer */ memcpy_toio(&ptx->var, eth2_llc, sizeof(eth2_llc)); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 218a3b686d9..70962f3fdb7 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -339,7 +339,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= 1536) { + if (ntohs(proto) >= ETH_P_802_3_MIN) { skb->protocol = proto; return; } diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 798032d0111..ade07f1c491 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -94,6 +94,9 @@ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value + * then the frame is Ethernet II. Else it is 802.3 */ + /* * Non DIX types. Won't clash for 1500 types. */ diff --git a/net/atm/lec.h b/net/atm/lec.h index a86aff9a3c0..4149db1b788 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -58,7 +58,7 @@ struct lane2_ops { * field in h_type field. Data follows immediately after header. * 2. LLC Data frames whose total length, including LLC field and data, * but not padding required to meet the minimum data frame length, - * is less than 1536(0x0600) MUST be encoded by placing that length + * is less than ETH_P_802_3_MIN MUST be encoded by placing that length * in the h_type field. The LLC field follows header immediately. * 3. LLC data frames longer than this maximum MUST be encoded by placing * the value 0 in the h_type field. diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index e58c8b32589..4b488ec2610 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb) struct ethhdr *eh = (void *) skb->data; u16 proto = ntohs(eh->h_proto); - if (proto >= 1536) + if (proto >= ETH_P_802_3_MIN) return proto; if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8d493c91a56..3d110c4fc78 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO)) + if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && FWINV2(e->ethproto != ethproto, EBT_IPROTO)) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a36c85eab5b..5359560926b 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (netdev_uses_trailer_tags(dev)) return htons(ETH_P_TRAILER); - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; /* diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8914d2d2881..4e8a86163fc 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2085,7 +2085,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype >= 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d61cd997180..8759265a3e4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -681,7 +681,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) /* Normally, setting the skb 'protocol' field would be handled by a * call to eth_type_trans(), but it assumes there's a sending * device, which we may not have. */ - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) packet->protocol = eth->h_proto; else packet->protocol = htons(ETH_P_802_2); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fe0e4215c73..33248683934 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); - if (ntohs(proto) >= 1536) + if (ntohs(proto) >= ETH_P_802_3_MIN) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) @@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) __skb_pull(skb, sizeof(struct llc_snap_hdr)); - if (ntohs(llc->ethertype) >= 1536) + if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) return llc->ethertype; return htons(ETH_P_802_2); @@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (ntohs(swkey->eth.type) < 1536) + if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) return -EINVAL; attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); } else { diff --git a/net/wireless/util.c b/net/wireless/util.c index 37a56ee1e1e..6cbac99ae03 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype > 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; -- cgit v1.2.3-70-g09d2 From a3f109bd793dfe5c611220ca5ab6c72f1aed479e Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 28 Mar 2013 11:51:31 +0000 Subject: sh_eth: add R-Car support for real Commit d0418bb7123f44b23d69ac349eec7daf9103472f (net: sh_eth: Add eth support for R8A7779 device) was a failed attempt to add support for one of members of the R-Car SoC family. That's for three reasons: it treated R8A7779 the same as SH7724 except including quite dirty hack adding ECMR_ELB bit to the mask in sh_eth_set_rate() while not removing ECMR_RTM bit (despite it's reserved in R-Car Ether), and it didn't add a new register offset array despite the closest SH_ETH_REG_FAST_SH4 mapping differs by 0x200 to the offsets all the R-Car Ether registers have, and also some of the registers in this old mapping don't exist on R-Car Ether (due to this, SH7724's 'sh_eth_my_cpu_data' structure is not adequeate for R-Car too). Fix all these shortcomings, restoring the SH7724 related section to its pristine state... Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 107 +++++++++++++++++++++++++++++++--- include/linux/sh_eth.h | 1 + 2 files changed, 100 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 13abe917cbd..da604059b14 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2,7 +2,8 @@ * SuperH Ethernet device driver * * Copyright (C) 2006-2012 Nobuhiro Iwamatsu - * Copyright (C) 2008-2012 Renesas Solutions Corp. + * Copyright (C) 2008-2013 Renesas Solutions Corp. + * Copyright (C) 2013 Cogent Embedded, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -147,6 +148,51 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { [FWALCR1] = 0x00b4, }; +static const u16 sh_eth_offset_fast_rcar[SH_ETH_MAX_REGISTER_OFFSET] = { + [ECMR] = 0x0300, + [RFLR] = 0x0308, + [ECSR] = 0x0310, + [ECSIPR] = 0x0318, + [PIR] = 0x0320, + [PSR] = 0x0328, + [RDMLR] = 0x0340, + [IPGR] = 0x0350, + [APR] = 0x0354, + [MPR] = 0x0358, + [RFCF] = 0x0360, + [TPAUSER] = 0x0364, + [TPAUSECR] = 0x0368, + [MAHR] = 0x03c0, + [MALR] = 0x03c8, + [TROCR] = 0x03d0, + [CDCR] = 0x03d4, + [LCCR] = 0x03d8, + [CNDCR] = 0x03dc, + [CEFCR] = 0x03e4, + [FRECR] = 0x03e8, + [TSFRCR] = 0x03ec, + [TLFRCR] = 0x03f0, + [RFCR] = 0x03f4, + [MAFCR] = 0x03f8, + + [EDMR] = 0x0200, + [EDTRR] = 0x0208, + [EDRRR] = 0x0210, + [TDLAR] = 0x0218, + [RDLAR] = 0x0220, + [EESR] = 0x0228, + [EESIPR] = 0x0230, + [TRSCER] = 0x0238, + [RMFCR] = 0x0240, + [TFTR] = 0x0248, + [FDR] = 0x0250, + [RMCR] = 0x0258, + [TFUCR] = 0x0264, + [RFOCR] = 0x0268, + [FCFTR] = 0x0270, + [TRIMD] = 0x027c, +}; + static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = { [ECMR] = 0x0100, [RFLR] = 0x0108, @@ -296,7 +342,7 @@ static void sh_eth_select_mii(struct net_device *ndev) #endif /* There is CPU dependent code */ -#if defined(CONFIG_CPU_SUBTYPE_SH7724) || defined(CONFIG_ARCH_R8A7779) +#if defined(CONFIG_ARCH_R8A7779) #define SH_ETH_RESET_DEFAULT 1 static void sh_eth_set_duplex(struct net_device *ndev) { @@ -311,18 +357,60 @@ static void sh_eth_set_duplex(struct net_device *ndev) static void sh_eth_set_rate(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned int bits = ECMR_RTM; -#if defined(CONFIG_ARCH_R8A7779) - bits |= ECMR_ELB; -#endif + switch (mdp->speed) { + case 10: /* 10BASE */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_ELB, ECMR); + break; + case 100:/* 100BASE */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_ELB, ECMR); + break; + default: + break; + } +} + +/* R8A7779 */ +static struct sh_eth_cpu_data sh_eth_my_cpu_data = { + .set_duplex = sh_eth_set_duplex, + .set_rate = sh_eth_set_rate, + + .ecsr_value = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD, + .ecsipr_value = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP, + .eesipr_value = 0x01ff009f, + + .tx_check = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO, + .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RDE | + EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI, + .tx_error_check = EESR_TWB | EESR_TABT | EESR_TDE | EESR_TFE, + + .apr = 1, + .mpr = 1, + .tpauser = 1, + .hw_swap = 1, +}; +#elif defined(CONFIG_CPU_SUBTYPE_SH7724) +#define SH_ETH_RESET_DEFAULT 1 +static void sh_eth_set_duplex(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + if (mdp->duplex) /* Full */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_DM, ECMR); + else /* Half */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR); +} + +static void sh_eth_set_rate(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); switch (mdp->speed) { case 10: /* 10BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~bits, ECMR); + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_RTM, ECMR); break; case 100:/* 100BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | bits, ECMR); + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_RTM, ECMR); break; default: break; @@ -2521,6 +2609,9 @@ static const u16 *sh_eth_get_register_offset(int register_type) case SH_ETH_REG_GIGABIT: reg_offset = sh_eth_offset_gigabit; break; + case SH_ETH_REG_FAST_RCAR: + reg_offset = sh_eth_offset_fast_rcar; + break; case SH_ETH_REG_FAST_SH4: reg_offset = sh_eth_offset_fast_sh4; break; diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index b17d765ded8..fc305713fc6 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -6,6 +6,7 @@ enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN}; enum { SH_ETH_REG_GIGABIT, + SH_ETH_REG_FAST_RCAR, SH_ETH_REG_FAST_SH4, SH_ETH_REG_FAST_SH3_SH2 }; -- cgit v1.2.3-70-g09d2 From 14b57a10553b5b768f77b247e6dd285c65816064 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 29 Mar 2013 14:46:51 +0100 Subject: openvswitch: Use ETH_ALEN to define ethernet addresses Signed-off-by: Thomas Graf Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 67d6c7b0358..8b9d7217edd 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -20,6 +20,7 @@ #define _LINUX_OPENVSWITCH_H 1 #include +#include /** * struct ovs_header - header for OVS Generic Netlink messages. @@ -269,8 +270,8 @@ enum ovs_frag_type { #define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) struct ovs_key_ethernet { - __u8 eth_src[6]; - __u8 eth_dst[6]; + __u8 eth_src[ETH_ALEN]; + __u8 eth_dst[ETH_ALEN]; }; struct ovs_key_ipv4 { @@ -316,14 +317,14 @@ struct ovs_key_arp { __be32 arp_sip; __be32 arp_tip; __be16 arp_op; - __u8 arp_sha[6]; - __u8 arp_tha[6]; + __u8 arp_sha[ETH_ALEN]; + __u8 arp_tha[ETH_ALEN]; }; struct ovs_key_nd { __u32 nd_target[4]; - __u8 nd_sll[6]; - __u8 nd_tll[6]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; }; /** -- cgit v1.2.3-70-g09d2 From 22e3880a76bb9a0c4fa5c8fefdc8697a36a4dae1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 29 Mar 2013 14:46:52 +0100 Subject: openvswitch: Expose to userspace It contains the public netlink interface bits required by userspace to make use of the interface. Signed-off-by: Thomas Graf Signed-off-by: Jesse Gross --- include/linux/openvswitch.h | 433 +------------------------------------ include/uapi/linux/Kbuild | 1 + include/uapi/linux/openvswitch.h | 456 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 458 insertions(+), 432 deletions(-) create mode 100644 include/uapi/linux/openvswitch.h (limited to 'include') diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 8b9d7217edd..e6b240b6196 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -19,437 +19,6 @@ #ifndef _LINUX_OPENVSWITCH_H #define _LINUX_OPENVSWITCH_H 1 -#include -#include - -/** - * struct ovs_header - header for OVS Generic Netlink messages. - * @dp_ifindex: ifindex of local port for datapath (0 to make a request not - * specific to a datapath). - * - * Attributes following the header are specific to a particular OVS Generic - * Netlink family, but all of the OVS families use this header. - */ - -struct ovs_header { - int dp_ifindex; -}; - -/* Datapaths. */ - -#define OVS_DATAPATH_FAMILY "ovs_datapath" -#define OVS_DATAPATH_MCGROUP "ovs_datapath" -#define OVS_DATAPATH_VERSION 0x1 - -enum ovs_datapath_cmd { - OVS_DP_CMD_UNSPEC, - OVS_DP_CMD_NEW, - OVS_DP_CMD_DEL, - OVS_DP_CMD_GET, - OVS_DP_CMD_SET -}; - -/** - * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. - * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local - * port". This is the name of the network device whose dp_ifindex is given in - * the &struct ovs_header. Always present in notifications. Required in - * %OVS_DP_NEW requests. May be used as an alternative to specifying - * dp_ifindex in other requests (with a dp_ifindex of 0). - * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially - * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on - * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should - * not be sent. - * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the - * datapath. Always present in notifications. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_DP_* commands. - */ -enum ovs_datapath_attr { - OVS_DP_ATTR_UNSPEC, - OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ - OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ - OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ - __OVS_DP_ATTR_MAX -}; - -#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) - -struct ovs_dp_stats { - __u64 n_hit; /* Number of flow table matches. */ - __u64 n_missed; /* Number of flow table misses. */ - __u64 n_lost; /* Number of misses not sent to userspace. */ - __u64 n_flows; /* Number of flows present */ -}; - -struct ovs_vport_stats { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ -}; - -/* Fixed logical ports. */ -#define OVSP_LOCAL ((__u32)0) - -/* Packet transfer. */ - -#define OVS_PACKET_FAMILY "ovs_packet" -#define OVS_PACKET_VERSION 0x1 - -enum ovs_packet_cmd { - OVS_PACKET_CMD_UNSPEC, - - /* Kernel-to-user notifications. */ - OVS_PACKET_CMD_MISS, /* Flow table miss. */ - OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ - - /* Userspace commands. */ - OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ -}; - -/** - * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. - * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire - * packet as received, from the start of the Ethernet header onward. For - * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by - * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is - * the flow key extracted from the packet as originally received. - * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key - * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows - * userspace to adapt its flow setup strategy by comparing its notion of the - * flow key against the kernel's. - * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used - * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. - * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION - * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an - * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content - * specified there. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_PACKET_* commands. - */ -enum ovs_packet_attr { - OVS_PACKET_ATTR_UNSPEC, - OVS_PACKET_ATTR_PACKET, /* Packet data. */ - OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ - OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ - __OVS_PACKET_ATTR_MAX -}; - -#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) - -/* Virtual ports. */ - -#define OVS_VPORT_FAMILY "ovs_vport" -#define OVS_VPORT_MCGROUP "ovs_vport" -#define OVS_VPORT_VERSION 0x1 - -enum ovs_vport_cmd { - OVS_VPORT_CMD_UNSPEC, - OVS_VPORT_CMD_NEW, - OVS_VPORT_CMD_DEL, - OVS_VPORT_CMD_GET, - OVS_VPORT_CMD_SET -}; - -enum ovs_vport_type { - OVS_VPORT_TYPE_UNSPEC, - OVS_VPORT_TYPE_NETDEV, /* network device */ - OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ - __OVS_VPORT_TYPE_MAX -}; - -#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) - -/** - * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. - * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. - * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type - * of vport. - * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device - * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes - * plus a null terminator. - * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on - * this port. A value of zero indicates that upcalls should not be sent. - * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for - * packets sent or received through the vport. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_VPORT_* commands. - * - * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and - * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is - * optional; if not specified a free port number is automatically selected. - * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type - * of vport. - * and other attributes are ignored. - * - * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to - * look up the vport to operate on; otherwise dp_idx from the &struct - * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. - */ -enum ovs_vport_attr { - OVS_VPORT_ATTR_UNSPEC, - OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ - OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ - OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ - OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ - OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ - OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ - __OVS_VPORT_ATTR_MAX -}; - -#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) - -/* Flows. */ - -#define OVS_FLOW_FAMILY "ovs_flow" -#define OVS_FLOW_MCGROUP "ovs_flow" -#define OVS_FLOW_VERSION 0x1 - -enum ovs_flow_cmd { - OVS_FLOW_CMD_UNSPEC, - OVS_FLOW_CMD_NEW, - OVS_FLOW_CMD_DEL, - OVS_FLOW_CMD_GET, - OVS_FLOW_CMD_SET -}; - -struct ovs_flow_stats { - __u64 n_packets; /* Number of matched packets. */ - __u64 n_bytes; /* Number of matched bytes. */ -}; - -enum ovs_key_attr { - OVS_KEY_ATTR_UNSPEC, - OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ - OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ - OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ - OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ - OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ - OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ - OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ - OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ - OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ - OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ - OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ - OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ - OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ - OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ - OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ - __OVS_KEY_ATTR_MAX -}; - -#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) - -/** - * enum ovs_frag_type - IPv4 and IPv6 fragment type - * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. - * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. - * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. - * - * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct - * ovs_key_ipv6. - */ -enum ovs_frag_type { - OVS_FRAG_TYPE_NONE, - OVS_FRAG_TYPE_FIRST, - OVS_FRAG_TYPE_LATER, - __OVS_FRAG_TYPE_MAX -}; - -#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) - -struct ovs_key_ethernet { - __u8 eth_src[ETH_ALEN]; - __u8 eth_dst[ETH_ALEN]; -}; - -struct ovs_key_ipv4 { - __be32 ipv4_src; - __be32 ipv4_dst; - __u8 ipv4_proto; - __u8 ipv4_tos; - __u8 ipv4_ttl; - __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ -}; - -struct ovs_key_ipv6 { - __be32 ipv6_src[4]; - __be32 ipv6_dst[4]; - __be32 ipv6_label; /* 20-bits in least-significant bits. */ - __u8 ipv6_proto; - __u8 ipv6_tclass; - __u8 ipv6_hlimit; - __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ -}; - -struct ovs_key_tcp { - __be16 tcp_src; - __be16 tcp_dst; -}; - -struct ovs_key_udp { - __be16 udp_src; - __be16 udp_dst; -}; - -struct ovs_key_icmp { - __u8 icmp_type; - __u8 icmp_code; -}; - -struct ovs_key_icmpv6 { - __u8 icmpv6_type; - __u8 icmpv6_code; -}; - -struct ovs_key_arp { - __be32 arp_sip; - __be32 arp_tip; - __be16 arp_op; - __u8 arp_sha[ETH_ALEN]; - __u8 arp_tha[ETH_ALEN]; -}; - -struct ovs_key_nd { - __u32 nd_target[4]; - __u8 nd_sll[ETH_ALEN]; - __u8 nd_tll[ETH_ALEN]; -}; - -/** - * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. - * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow - * key. Always present in notifications. Required for all requests (except - * dumps). - * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying - * the actions to take for packets that match the key. Always present in - * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for - * %OVS_FLOW_CMD_SET requests. - * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this - * flow. Present in notifications if the stats would be nonzero. Ignored in - * requests. - * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the - * TCP flags seen on packets in this flow. Only present in notifications for - * TCP flows, and only if it would be nonzero. Ignored in requests. - * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on - * the system monotonic clock, at which a packet was last processed for this - * flow. Only present in notifications if a packet has been processed for this - * flow. Ignored in requests. - * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the - * last-used time, accumulated TCP flags, and statistics for this flow. - * Otherwise ignored in requests. Never present in notifications. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_FLOW_* commands. - */ -enum ovs_flow_attr { - OVS_FLOW_ATTR_UNSPEC, - OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ - OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ - OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ - OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ - OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ - __OVS_FLOW_ATTR_MAX -}; - -#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) - -/** - * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. - * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with - * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of - * %UINT32_MAX samples all packets and intermediate values sample intermediate - * fractions of packets. - * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. - * Actions are passed as nested attributes. - * - * Executes the specified actions with the given probability on a per-packet - * basis. - */ -enum ovs_sample_attr { - OVS_SAMPLE_ATTR_UNSPEC, - OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ - OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - __OVS_SAMPLE_ATTR_MAX, -}; - -#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) - -/** - * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. - * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION - * message should be sent. Required. - * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is - * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. - */ -enum ovs_userspace_attr { - OVS_USERSPACE_ATTR_UNSPEC, - OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ - OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ - __OVS_USERSPACE_ATTR_MAX -}; - -#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) - -/** - * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. - * @vlan_tpid: Tag protocol identifier (TPID) to push. - * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set - * (but it will not be set in the 802.1Q header that is pushed). - * - * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID - * values are those that the kernel module also parses as 802.1Q headers, to - * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN - * from having surprising results. - */ -struct ovs_action_push_vlan { - __be16 vlan_tpid; /* 802.1Q TPID. */ - __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ -}; - -/** - * enum ovs_action_attr - Action types. - * - * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. - * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested - * %OVS_USERSPACE_ATTR_* attributes. - * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The - * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its - * value. - * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the - * packet. - * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. - * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in - * the nested %OVS_SAMPLE_ATTR_* attributes. - * - * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all - * fields within a header are modifiable, e.g. the IPv4 protocol and fragment - * type may not be changed. - */ - -enum ovs_action_attr { - OVS_ACTION_ATTR_UNSPEC, - OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ - OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ - OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ - OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ - OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ - OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ - __OVS_ACTION_ATTR_MAX -}; - -#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) +#include #endif /* _LINUX_OPENVSWITCH_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5c8a1d25e21..d8fbc6aeac8 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -285,6 +285,7 @@ header-y += nvram.h header-y += omap3isp.h header-y += omapfb.h header-y += oom.h +header-y += openvswitch.h header-y += packet_diag.h header-y += param.h header-y += parport.h diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h new file mode 100644 index 00000000000..405918dd7b3 --- /dev/null +++ b/include/uapi/linux/openvswitch.h @@ -0,0 +1,456 @@ + +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _UAPI__LINUX_OPENVSWITCH_H +#define _UAPI__LINUX_OPENVSWITCH_H 1 + +#include +#include + +/** + * struct ovs_header - header for OVS Generic Netlink messages. + * @dp_ifindex: ifindex of local port for datapath (0 to make a request not + * specific to a datapath). + * + * Attributes following the header are specific to a particular OVS Generic + * Netlink family, but all of the OVS families use this header. + */ + +struct ovs_header { + int dp_ifindex; +}; + +/* Datapaths. */ + +#define OVS_DATAPATH_FAMILY "ovs_datapath" +#define OVS_DATAPATH_MCGROUP "ovs_datapath" +#define OVS_DATAPATH_VERSION 0x1 + +enum ovs_datapath_cmd { + OVS_DP_CMD_UNSPEC, + OVS_DP_CMD_NEW, + OVS_DP_CMD_DEL, + OVS_DP_CMD_GET, + OVS_DP_CMD_SET +}; + +/** + * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. + * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local + * port". This is the name of the network device whose dp_ifindex is given in + * the &struct ovs_header. Always present in notifications. Required in + * %OVS_DP_NEW requests. May be used as an alternative to specifying + * dp_ifindex in other requests (with a dp_ifindex of 0). + * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially + * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on + * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should + * not be sent. + * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the + * datapath. Always present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_DP_* commands. + */ +enum ovs_datapath_attr { + OVS_DP_ATTR_UNSPEC, + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + __OVS_DP_ATTR_MAX +}; + +#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) + +struct ovs_dp_stats { + __u64 n_hit; /* Number of flow table matches. */ + __u64 n_missed; /* Number of flow table misses. */ + __u64 n_lost; /* Number of misses not sent to userspace. */ + __u64 n_flows; /* Number of flows present */ +}; + +struct ovs_vport_stats { + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ +}; + +/* Fixed logical ports. */ +#define OVSP_LOCAL ((__u32)0) + +/* Packet transfer. */ + +#define OVS_PACKET_FAMILY "ovs_packet" +#define OVS_PACKET_VERSION 0x1 + +enum ovs_packet_cmd { + OVS_PACKET_CMD_UNSPEC, + + /* Kernel-to-user notifications. */ + OVS_PACKET_CMD_MISS, /* Flow table miss. */ + OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ + + /* Userspace commands. */ + OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ +}; + +/** + * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. + * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire + * packet as received, from the start of the Ethernet header onward. For + * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by + * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is + * the flow key extracted from the packet as originally received. + * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key + * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows + * userspace to adapt its flow setup strategy by comparing its notion of the + * flow key against the kernel's. + * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used + * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. + * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content + * specified there. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_PACKET_* commands. + */ +enum ovs_packet_attr { + OVS_PACKET_ATTR_UNSPEC, + OVS_PACKET_ATTR_PACKET, /* Packet data. */ + OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ + OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ + __OVS_PACKET_ATTR_MAX +}; + +#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) + +/* Virtual ports. */ + +#define OVS_VPORT_FAMILY "ovs_vport" +#define OVS_VPORT_MCGROUP "ovs_vport" +#define OVS_VPORT_VERSION 0x1 + +enum ovs_vport_cmd { + OVS_VPORT_CMD_UNSPEC, + OVS_VPORT_CMD_NEW, + OVS_VPORT_CMD_DEL, + OVS_VPORT_CMD_GET, + OVS_VPORT_CMD_SET +}; + +enum ovs_vport_type { + OVS_VPORT_TYPE_UNSPEC, + OVS_VPORT_TYPE_NETDEV, /* network device */ + OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ + __OVS_VPORT_TYPE_MAX +}; + +#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) + +/** + * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. + * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. + * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type + * of vport. + * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device + * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes + * plus a null terminator. + * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. + * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that + * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on + * this port. A value of zero indicates that upcalls should not be sent. + * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for + * packets sent or received through the vport. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_VPORT_* commands. + * + * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and + * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is + * optional; if not specified a free port number is automatically selected. + * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type + * of vport. + * and other attributes are ignored. + * + * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to + * look up the vport to operate on; otherwise dp_idx from the &struct + * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. + */ +enum ovs_vport_attr { + OVS_VPORT_ATTR_UNSPEC, + OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ + OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ + OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ + OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ + OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ + OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ + __OVS_VPORT_ATTR_MAX +}; + +#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) + +/* Flows. */ + +#define OVS_FLOW_FAMILY "ovs_flow" +#define OVS_FLOW_MCGROUP "ovs_flow" +#define OVS_FLOW_VERSION 0x1 + +enum ovs_flow_cmd { + OVS_FLOW_CMD_UNSPEC, + OVS_FLOW_CMD_NEW, + OVS_FLOW_CMD_DEL, + OVS_FLOW_CMD_GET, + OVS_FLOW_CMD_SET +}; + +struct ovs_flow_stats { + __u64 n_packets; /* Number of matched packets. */ + __u64 n_bytes; /* Number of matched bytes. */ +}; + +enum ovs_key_attr { + OVS_KEY_ATTR_UNSPEC, + OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ + OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ + OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ + OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ + OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ + OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ + OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ + OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ + OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ + OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ + OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ + OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ + OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ + OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ + OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ + __OVS_KEY_ATTR_MAX +}; + +#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) + +/** + * enum ovs_frag_type - IPv4 and IPv6 fragment type + * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. + * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. + * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. + * + * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct + * ovs_key_ipv6. + */ +enum ovs_frag_type { + OVS_FRAG_TYPE_NONE, + OVS_FRAG_TYPE_FIRST, + OVS_FRAG_TYPE_LATER, + __OVS_FRAG_TYPE_MAX +}; + +#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) + +struct ovs_key_ethernet { + __u8 eth_src[ETH_ALEN]; + __u8 eth_dst[ETH_ALEN]; +}; + +struct ovs_key_ipv4 { + __be32 ipv4_src; + __be32 ipv4_dst; + __u8 ipv4_proto; + __u8 ipv4_tos; + __u8 ipv4_ttl; + __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_ipv6 { + __be32 ipv6_src[4]; + __be32 ipv6_dst[4]; + __be32 ipv6_label; /* 20-bits in least-significant bits. */ + __u8 ipv6_proto; + __u8 ipv6_tclass; + __u8 ipv6_hlimit; + __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_tcp { + __be16 tcp_src; + __be16 tcp_dst; +}; + +struct ovs_key_udp { + __be16 udp_src; + __be16 udp_dst; +}; + +struct ovs_key_icmp { + __u8 icmp_type; + __u8 icmp_code; +}; + +struct ovs_key_icmpv6 { + __u8 icmpv6_type; + __u8 icmpv6_code; +}; + +struct ovs_key_arp { + __be32 arp_sip; + __be32 arp_tip; + __be16 arp_op; + __u8 arp_sha[ETH_ALEN]; + __u8 arp_tha[ETH_ALEN]; +}; + +struct ovs_key_nd { + __u32 nd_target[4]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; +}; + +/** + * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. + * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow + * key. Always present in notifications. Required for all requests (except + * dumps). + * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying + * the actions to take for packets that match the key. Always present in + * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for + * %OVS_FLOW_CMD_SET requests. + * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this + * flow. Present in notifications if the stats would be nonzero. Ignored in + * requests. + * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the + * TCP flags seen on packets in this flow. Only present in notifications for + * TCP flows, and only if it would be nonzero. Ignored in requests. + * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on + * the system monotonic clock, at which a packet was last processed for this + * flow. Only present in notifications if a packet has been processed for this + * flow. Ignored in requests. + * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the + * last-used time, accumulated TCP flags, and statistics for this flow. + * Otherwise ignored in requests. Never present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_FLOW_* commands. + */ +enum ovs_flow_attr { + OVS_FLOW_ATTR_UNSPEC, + OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ + OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ + OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ + OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ + __OVS_FLOW_ATTR_MAX +}; + +#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) + +/** + * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. + * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with + * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of + * %UINT32_MAX samples all packets and intermediate values sample intermediate + * fractions of packets. + * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. + * Actions are passed as nested attributes. + * + * Executes the specified actions with the given probability on a per-packet + * basis. + */ +enum ovs_sample_attr { + OVS_SAMPLE_ATTR_UNSPEC, + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + __OVS_SAMPLE_ATTR_MAX, +}; + +#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) + +/** + * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. + * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION + * message should be sent. Required. + * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is + * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. + */ +enum ovs_userspace_attr { + OVS_USERSPACE_ATTR_UNSPEC, + OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ + OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ + __OVS_USERSPACE_ATTR_MAX +}; + +#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) + +/** + * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. + * @vlan_tpid: Tag protocol identifier (TPID) to push. + * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set + * (but it will not be set in the 802.1Q header that is pushed). + * + * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID + * values are those that the kernel module also parses as 802.1Q headers, to + * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN + * from having surprising results. + */ +struct ovs_action_push_vlan { + __be16 vlan_tpid; /* 802.1Q TPID. */ + __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ +}; + +/** + * enum ovs_action_attr - Action types. + * + * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. + * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested + * %OVS_USERSPACE_ATTR_* attributes. + * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The + * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its + * value. + * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the + * packet. + * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. + * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in + * the nested %OVS_SAMPLE_ATTR_* attributes. + * + * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all + * fields within a header are modifiable, e.g. the IPv4 protocol and fragment + * type may not be changed. + */ + +enum ovs_action_attr { + OVS_ACTION_ATTR_UNSPEC, + OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ + OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ + OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ + OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ + OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ + OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ + __OVS_ACTION_ATTR_MAX +}; + +#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) + +#endif /* _LINUX_OPENVSWITCH_H */ -- cgit v1.2.3-70-g09d2 From a691ce7fe451363d2f1fa48d30c8f4b87c2475d4 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 28 Mar 2013 15:24:53 +0000 Subject: include/linux: printk is needed in filter.h when CONFIG_BPF_JIT is defined for make V=1 EXTRA_CFLAGS=-W ARCH=arm allmodconfig printk is need when CONFIG_BPF_JIT is defined or it will report pr_err and print_hex_dump are implicit declaration Signed-off-by: Chen Gang Signed-off-by: David S. Miller --- include/linux/filter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index d7d25083130..d1248f401a5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -48,6 +48,9 @@ extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); #ifdef CONFIG_BPF_JIT +#include +#include + extern void bpf_jit_compile(struct sk_filter *fp); extern void bpf_jit_free(struct sk_filter *fp); -- cgit v1.2.3-70-g09d2 From 4c3d5e7b41dda1b1372bfc2545ef092a1bc5ad33 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 30 Mar 2013 06:31:03 +0000 Subject: net: reorder some fields of net_device As time passed, some fields were added in net_device, and not at sensible offsets. Lets reorder some fields to reduce number of cache lines in RX path. Fields not used in data path should be moved out of this critical cache line. In particular, move broadcast[] to the end of the rx section, as it is less used, and ethernet uses only the beginning of the 32bytes field. Before patch : offsetof(struct net_device,dev_addr)=0x258 offsetof(struct net_device,rx_handler)=0x2b8 offsetof(struct net_device,ingress_queue)=0x2c8 offsetof(struct net_device,broadcast)=0x278 After : offsetof(struct net_device,dev_addr)=0x280 offsetof(struct net_device,rx_handler)=0x298 offsetof(struct net_device,ingress_queue)=0x2a8 offsetof(struct net_device,broadcast)=0x2b0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1dbb02c9894..4491414a921 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1071,6 +1071,8 @@ struct net_device { struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; + struct list_head upper_dev_list; /* List of upper devices */ + /* currently active device features */ netdev_features_t features; @@ -1143,6 +1145,13 @@ struct net_device { spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; /* Unicast mac addresses */ struct netdev_hw_addr_list mc; /* Multicast mac addresses */ + struct netdev_hw_addr_list dev_addrs; /* list of device + * hw addresses + */ +#ifdef CONFIG_SYSFS + struct kset *queues_kset; +#endif + bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1175,21 +1184,11 @@ struct net_device { * avoid dirtying this cache line. */ - struct list_head upper_dev_list; /* List of upper devices */ - /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are unicast) */ - struct netdev_hw_addr_list dev_addrs; /* list of device - hw addresses */ - - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ - -#ifdef CONFIG_SYSFS - struct kset *queues_kset; -#endif #ifdef CONFIG_RPS struct netdev_rx_queue *_rx; @@ -1200,18 +1199,14 @@ struct net_device { /* Number of RX queues currently active in device */ unsigned int real_num_rx_queues; -#ifdef CONFIG_RFS_ACCEL - /* CPU reverse-mapping for RX completion interrupts, indexed - * by RX queue number. Assigned by driver. This must only be - * set if the ndo_rx_flow_steer operation is defined. */ - struct cpu_rmap *rx_cpu_rmap; -#endif #endif rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; struct netdev_queue __rcu *ingress_queue; + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + /* * Cache lines mostly used on transmit path @@ -1233,6 +1228,12 @@ struct net_device { #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_RFS_ACCEL + /* CPU reverse-mapping for RX completion interrupts, indexed + * by RX queue number. Assigned by driver. This must only be + * set if the ndo_rx_flow_steer operation is defined. */ + struct cpu_rmap *rx_cpu_rmap; +#endif /* These may be needed for future network-power-down code. */ -- cgit v1.2.3-70-g09d2 From 7d4c04fc170087119727119074e72445f2bb192b Mon Sep 17 00:00:00 2001 From: "Keller, Jacob E" Date: Thu, 28 Mar 2013 11:19:25 +0000 Subject: net: add option to enable error queue packets waking select Currently, when a socket receives something on the error queue it only wakes up the socket on select if it is in the "read" list, that is the socket has something to read. It is useful also to wake the socket if it is in the error list, which would enable software to wait on error queue packets without waking up for regular data on the socket. The main use case is for receiving timestamped transmit packets which return the timestamp to the socket via the error queue. This enables an application to select on the socket for the error queue only instead of for the regular traffic. -v2- * Added the SO_SELECT_ERR_QUEUE socket option to every architechture specific file * Modified every socket poll function that checks error queue Signed-off-by: Jacob Keller Cc: Jeffrey Kirsher Cc: Richard Cochran Cc: Matthew Vick Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/cris/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/h8300/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 1 + include/uapi/asm-generic/socket.h | 2 ++ net/bluetooth/af_bluetooth.c | 3 ++- net/core/datagram.c | 4 +++- net/core/sock.c | 8 ++++++++ net/iucv/af_iucv.c | 3 ++- net/nfc/llcp/sock.c | 3 ++- net/sctp/socket.c | 3 ++- net/unix/af_unix.c | 4 +++- 23 files changed, 53 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index c5195524d1e..eee6ea76bda 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 51c6401582e..37401f53512 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 50692b738c7..ba409c9947b 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -74,6 +74,8 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 595391f0f98..31dbb5d8e13 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -72,5 +72,7 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index 43e32621da7..5d1c6d0870e 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index c567adc8bea..6b4329f18b2 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -81,4 +81,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 519afa2755d..2a3b59e0e17 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 47132f44c95..3b211507be7 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -90,4 +90,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 5c7c7c98854..b4ce844c939 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 526e4b9aece..70c512a386f 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -71,6 +71,8 @@ #define SO_LOCK_FILTER 0x4025 +#define SO_SELECT_ERR_QUEUE 0x4026 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a26dcaece50..a36daf3c6f9 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index f99eea7fff0..2dacb306835 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -78,4 +78,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index cbbad74b2e0..89f49b68a21 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -68,6 +68,8 @@ #define SO_LOCK_FILTER 0x0028 +#define SO_SELECT_ERR_QUEUE 0x0029 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 35905cb6e41..a8f44f50e65 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -83,4 +83,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 14f6e9d19dc..08f05f96473 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -667,6 +667,7 @@ enum sock_flags { * user-space instead. */ SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ + SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 4ef3acbba5d..c5d2e3a1cf6 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -74,4 +74,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d3ee69b35a7..409902f892f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -422,7 +422,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; diff --git a/net/core/datagram.c b/net/core/datagram.c index 368f9c3f9dc..36da5b66351 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/core/sock.c b/net/core/sock.c index a19e728d5fb..2ff5f3619a8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -907,6 +907,10 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; + case SO_SELECT_ERR_QUEUE: + sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_FILTER_LOCKED); break; + case SO_SELECT_ERR_QUEUE: + v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); + break; + default: return -ENOPROTOOPT; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a7d11ffe428..f0550a38f29 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, return iucv_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index f1b377e247f..2d55e8a4595 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -521,7 +521,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, return llcp_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b9070736b8d..dd21ae3013d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6185,7 +6185,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b6f6a..fb7a63ff71a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) -- cgit v1.2.3-70-g09d2 From 932bc4d7a53ba418de67fdab533248df5b36c752 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:57:58 +0200 Subject: net: add skb_dst_set_noref_force Rename skb_dst_set_noref to __skb_dst_set_noref and add force flag as suggested by David Miller. The new wrapper skb_dst_set_noref_force will force dst entries that are not cached to be attached as skb dst without taking reference as long as provided dst is reclaimed after RCU grace period. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Acked-by: David S. Miller Signed-off-by: Simon Horman --- include/linux/skbuff.h | 35 ++++++++++++++++++++++++++++++++++- net/core/dst.c | 9 +++++---- 2 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 878e0ee8106..364e2440a7e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst); +extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, + bool force); + +/** + * skb_dst_set_noref - sets skb dst, hopefully, without taking reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst. + * If dst entry is cached, we do not take reference and dst_release + * will be avoided by refdst_drop. If dst entry is not cached, we take + * reference, so that last dst_release can destroy the dst immediately. + */ +static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +{ + __skb_dst_set_noref(skb, dst, false); +} + +/** + * skb_dst_set_noref_force - sets skb dst, without taking reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst. + * No reference is taken and no dst_release will be called. While for + * cached dsts deferred reclaim is a basic feature, for entries that are + * not cached it is caller's job to guarantee that last dst_release for + * provided dst happens when nobody uses it, eg. after a RCU grace period. + */ +static inline void skb_dst_set_noref_force(struct sk_buff *skb, + struct dst_entry *dst) +{ + __skb_dst_set_noref(skb, dst, true); +} /** * skb_dst_is_noref - Test if skb dst isn't refcounted diff --git a/net/core/dst.c b/net/core/dst.c index 35fd12f1a69..df9cc810ec8 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) EXPORT_SYMBOL(__dst_destroy_metrics_generic); /** - * skb_dst_set_noref - sets skb dst, without a reference + * __skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer * @dst: dst entry + * @force: if force is set, use noref version even for DST_NOCACHE entries * * Sets skb dst, assuming a reference was not taken on dst * skb_dst_drop() should not dst_release() this dst */ -void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - if (unlikely(dst->flags & DST_NOCACHE)) { + if (unlikely((dst->flags & DST_NOCACHE) && !force)) { dst_hold(dst); skb_dst_set(skb, dst); } else { skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } } -EXPORT_SYMBOL(skb_dst_set_noref); +EXPORT_SYMBOL(__skb_dst_set_noref); /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat -- cgit v1.2.3-70-g09d2 From c90558dae51cef334f3d9d447cf7c0fd1bfe725d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:57:59 +0200 Subject: ipvs: avoid routing by TOS for real server Avoid replacing the cached route for real server on every packet with different TOS. I doubt that routing by TOS for real server is used at all, so we should be better with such optimization. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 - net/netfilter/ipvs/ip_vs_xmit.c | 58 ++++++++++++++++++----------------------- 2 files changed, 25 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index bee87badabe..64db11769cc 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -753,7 +753,6 @@ struct ip_vs_dest { /* for destination cache */ spinlock_t dst_lock; /* lock of dst_cache */ struct dst_entry *dst_cache; /* destination cache entry */ - u32 dst_rtos; /* RT_TOS(tos) for dst */ u32 dst_cookie; union nf_inet_addr dst_saddr; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index ee6b7a9f1ec..4b0bd15ad7c 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -57,27 +57,24 @@ enum { * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, - u32 dst_cookie) +__ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie) { struct dst_entry *old_dst; old_dst = dest->dst_cache; dest->dst_cache = dst; - dest->dst_rtos = rtos; dest->dst_cookie = dst_cookie; dst_release(old_dst); } static inline struct dst_entry * -__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) +__ip_vs_dst_check(struct ip_vs_dest *dest) { struct dst_entry *dst = dest->dst_cache; if (!dst) return NULL; - if ((dst->obsolete || rtos != dest->dst_rtos) && - dst->ops->check(dst, dest->dst_cookie) == NULL) { + if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) { dest->dst_cache = NULL; dst_release(dst); return NULL; @@ -104,7 +101,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) /* Get route to daddr, update *saddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - u32 rtos, int rt_mode, __be32 *saddr) + int rt_mode, __be32 *saddr) { struct flowi4 fl4; struct rtable *rt; @@ -113,7 +110,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; - fl4.flowi4_tos = rtos; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -124,7 +120,7 @@ retry: if (PTR_ERR(rt) == -EINVAL && *saddr && rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { *saddr = 0; - flowi4_update_output(&fl4, 0, rtos, daddr, 0); + flowi4_update_output(&fl4, 0, 0, daddr, 0); goto retry; } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); @@ -132,7 +128,7 @@ retry: } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); *saddr = fl4.saddr; - flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); loop++; goto retry; } @@ -143,7 +139,7 @@ retry: /* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, - __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) + __be32 daddr, int rt_mode, __be32 *ret_saddr) { struct net *net = dev_net(skb_dst(skb)->dev); struct rtable *rt; /* Route to the other host */ @@ -152,19 +148,18 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, if (dest) { spin_lock(&dest->dst_lock); - if (!(rt = (struct rtable *) - __ip_vs_dst_check(dest, rtos))) { - rt = do_output_route4(net, dest->addr.ip, rtos, - rt_mode, &dest->dst_saddr.ip); + rt = (struct rtable *) __ip_vs_dst_check(dest); + if (!rt) { + rt = do_output_route4(net, dest->addr.ip, rt_mode, + &dest->dst_saddr.ip); if (!rt) { spin_unlock(&dest->dst_lock); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " - "rtos=%X\n", + __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0); + IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", &dest->addr.ip, &dest->dst_saddr.ip, - atomic_read(&rt->dst.__refcnt), rtos); + atomic_read(&rt->dst.__refcnt)); } daddr = dest->addr.ip; if (ret_saddr) @@ -177,7 +172,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, &saddr); if (!rt) return NULL; if (ret_saddr) @@ -307,7 +302,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, if (dest) { spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); + rt = (struct rt6_info *)__ip_vs_dst_check(dest); if (!rt) { u32 cookie; @@ -320,7 +315,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, } rt = (struct rt6_info *) dst; cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); + __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", &dest->addr.in6, &dest->dst_saddr.in6, atomic_read(&rt->dst.__refcnt)); @@ -449,8 +444,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), - IP_VS_RT_MODE_NON_LOCAL, NULL))) + rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, + NULL); + if (!rt) goto tx_error_icmp; /* MTU checking */ @@ -581,10 +577,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR, NULL))) + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* @@ -832,10 +827,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_CONNECT, - &saddr))) + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT, &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); @@ -1067,7 +1061,6 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL))) @@ -1223,7 +1216,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(ip_hdr(skb)->tos), rt_mode, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; -- cgit v1.2.3-70-g09d2 From d1deae4d3ab37d833f278fec975a8f2ddeb78f3b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:02 +0200 Subject: ipvs: rename functions related to dst_cache reset Move and give better names to two functions: - ip_vs_dst_reset to __ip_vs_dst_cache_reset - __ip_vs_dev_reset to ip_vs_forget_dev Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 - net/netfilter/ipvs/ip_vs_ctl.c | 34 ++++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_xmit.c | 14 -------------- 3 files changed, 22 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 64db11769cc..8ad73a83465 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1427,7 +1427,6 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, struct ip_vs_iphdr *iph); -extern void ip_vs_dst_reset(struct ip_vs_dest *dest); #ifdef CONFIG_IP_VS_IPV6 extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6b55ba69c80..5265eaa7b42 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -641,6 +641,17 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, return dest; } +/* Release dst_cache for dest in user context */ +static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) +{ + struct dst_entry *old_dst; + + old_dst = dest->dst_cache; + dest->dst_cache = NULL; + dst_release(old_dst); + dest->dst_saddr.ip = 0; +} + /* * Lookup dest by {svc,addr,port} in the destination trash. * The destination trash is used to hold the destinations that are removed @@ -690,7 +701,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); list_del(&dest->n_list); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); kfree(dest); @@ -717,7 +728,7 @@ static void ip_vs_trash_cleanup(struct net *net) list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { list_del(&dest->n_list); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); kfree(dest); @@ -811,7 +822,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->l_threshold = udest->l_threshold; spin_lock_bh(&dest->dst_lock); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); if (add) @@ -1037,7 +1048,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) dest->vfwmark, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); /* simply decrease svc->refcnt here, let the caller check and release the service if nobody refers to it. Only user context can release destination and service, @@ -1496,11 +1507,10 @@ void ip_vs_service_net_cleanup(struct net *net) mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } -/* - * Release dst hold by dst_cache - */ + +/* Put all references for device (dst_cache) */ static inline void -__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) { spin_lock_bh(&dest->dst_lock); if (dest->dst_cache && dest->dst_cache->dev == dev) { @@ -1509,7 +1519,7 @@ __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); } spin_unlock_bh(&dest->dst_lock); @@ -1537,7 +1547,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } } @@ -1546,7 +1556,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } @@ -1554,7 +1564,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } list_for_each_entry(dest, &ipvs->dest_trash, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 7cd7c61692a..6448a2e862d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -362,20 +362,6 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, #endif -/* - * Release dest->dst_cache before a dest is removed - */ -void -ip_vs_dst_reset(struct ip_vs_dest *dest) -{ - struct dst_entry *old_dst; - - old_dst = dest->dst_cache; - dest->dst_cache = NULL; - dst_release(old_dst); - dest->dst_saddr.ip = 0; -} - /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, struct ip_vs_conn *cp) -- cgit v1.2.3-70-g09d2 From 026ace060dfe29275d2188297a62fa37d6c1a02c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:06 +0200 Subject: ipvs: optimize dst usage for real server Currently when forwarding requests to real servers we use dst_lock and atomic operations when cloning the dst_cache value. As the dst_cache value does not change most of the time it is better to use RCU and to lock dst_lock only when we need to replace the obsoleted dst. For this to work we keep dst_cache in new structure protected by RCU. For packets to remote real servers we will use noref version of dst_cache, it will be valid while we are in RCU read-side critical section because now dst_release for replaced dsts will be invoked after the grace period. Packets to local real servers that are passed to local stack with NF_ACCEPT need a dst clone. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++- net/netfilter/ipvs/ip_vs_core.c | 11 ++- net/netfilter/ipvs/ip_vs_ctl.c | 25 ++++-- net/netfilter/ipvs/ip_vs_xmit.c | 188 ++++++++++++++++++++++++++++++---------- 4 files changed, 177 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8ad73a83465..a150ff5d838 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -724,6 +724,13 @@ struct ip_vs_service { struct ip_vs_pe *pe; }; +/* Information for cached dst */ +struct ip_vs_dest_dst { + struct dst_entry *dst_cache; /* destination cache entry */ + u32 dst_cookie; + union nf_inet_addr dst_saddr; + struct rcu_head rcu_head; +}; /* * The real server destination forwarding entry @@ -752,9 +759,7 @@ struct ip_vs_dest { /* for destination cache */ spinlock_t dst_lock; /* lock of dst_cache */ - struct dst_entry *dst_cache; /* destination cache entry */ - u32 dst_cookie; - union nf_inet_addr dst_saddr; + struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ @@ -1427,6 +1432,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, struct ip_vs_iphdr *iph); +extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head); #ifdef CONFIG_IP_VS_IPV6 extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2aef23ed748..6ad24e7ef44 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1395,10 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) goto ignore_ipip; /* Prefer the resulting PMTU */ if (dest) { - spin_lock(&dest->dst_lock); - if (dest->dst_cache) - mtu = dst_mtu(dest->dst_cache); - spin_unlock(&dest->dst_lock); + struct ip_vs_dest_dst *dest_dst; + + rcu_read_lock(); + dest_dst = rcu_dereference(dest->dest_dst); + if (dest_dst) + mtu = dst_mtu(dest_dst->dst_cache); + rcu_read_unlock(); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5265eaa7b42..ef48cc55f77 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -641,15 +641,26 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, return dest; } -/* Release dst_cache for dest in user context */ +void ip_vs_dest_dst_rcu_free(struct rcu_head *head) +{ + struct ip_vs_dest_dst *dest_dst = container_of(head, + struct ip_vs_dest_dst, + rcu_head); + + dst_release(dest_dst->dst_cache); + kfree(dest_dst); +} + +/* Release dest_dst and dst_cache for dest in user context */ static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) { - struct dst_entry *old_dst; + struct ip_vs_dest_dst *old; - old_dst = dest->dst_cache; - dest->dst_cache = NULL; - dst_release(old_dst); - dest->dst_saddr.ip = 0; + old = rcu_dereference_protected(dest->dest_dst, 1); + if (old) { + RCU_INIT_POINTER(dest->dest_dst, NULL); + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); + } } /* @@ -1513,7 +1524,7 @@ static inline void ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) { spin_lock_bh(&dest->dst_lock); - if (dest->dst_cache && dest->dst_cache->dev == dev) { + if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) { IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", dev->name, IP_VS_DBG_ADDR(dest->af, &dest->addr), diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 603eb8a50e1..3db7889edef 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -17,6 +17,8 @@ * - not all connections have destination server, for example, * connections in backup server when fwmark is used * - bypass connections use daddr from packet + * - we can use dst without ref while sending in RCU section, we use + * ref when returning NF_ACCEPT for NAT-ed packet via loopback * LOCAL_OUT rules: * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) * - skb->pkt_type is not set yet @@ -54,34 +56,51 @@ enum { IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ }; +static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) +{ + return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); +} + +static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) +{ + kfree(dest_dst); +} + /* * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie) +__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, + struct dst_entry *dst, u32 dst_cookie) { - struct dst_entry *old_dst; + struct ip_vs_dest_dst *old; + + old = rcu_dereference_protected(dest->dest_dst, + lockdep_is_held(&dest->dst_lock)); + + if (dest_dst) { + dest_dst->dst_cache = dst; + dest_dst->dst_cookie = dst_cookie; + } + rcu_assign_pointer(dest->dest_dst, dest_dst); - old_dst = dest->dst_cache; - dest->dst_cache = dst; - dest->dst_cookie = dst_cookie; - dst_release(old_dst); + if (old) + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); } -static inline struct dst_entry * +static inline struct ip_vs_dest_dst * __ip_vs_dst_check(struct ip_vs_dest *dest) { - struct dst_entry *dst = dest->dst_cache; + struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); + struct dst_entry *dst; - if (!dst) + if (!dest_dst) return NULL; - if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) { - dest->dst_cache = NULL; - dst_release(dst); + dst = dest_dst->dst_cache; + if (dst->obsolete && + dst->ops->check(dst, dest_dst->dst_cookie) == NULL) return NULL; - } - dst_hold(dst); - return dst; + return dest_dst; } static inline bool @@ -144,35 +163,48 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, { struct net *net = dev_net(skb_dst(skb)->dev); struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ struct rtable *ort; /* Original route */ struct iphdr *iph; __be16 df; int mtu; - int local; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - rt = (struct rtable *) __ip_vs_dst_check(dest); - if (!rt) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rtable *) dest_dst->dst_cache; + else { + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock(&dest->dst_lock); + goto err_unreach; + } rt = do_output_route4(net, dest->addr.ip, rt_mode, - &dest->dst_saddr.ip); + &dest_dst->dst_saddr.ip); if (!rt) { + __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); goto err_unreach; } - __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); + spin_unlock(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", - &dest->addr.ip, &dest->dst_saddr.ip, + &dest->addr.ip, &dest_dst->dst_saddr.ip, atomic_read(&rt->dst.__refcnt)); } daddr = dest->addr.ip; if (ret_saddr) - *ret_saddr = dest->dst_saddr.ip; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.ip; } else { __be32 saddr = htonl(INADDR_ANY); + noref = 0; + /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ @@ -210,7 +242,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, goto err_put; } /* skb to local stack, preserve old route */ - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); return local; } @@ -240,12 +273,19 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, } skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); return local; err_put: - ip_rt_put(rt); + if (!noref) + ip_rt_put(rt); return -1; err_unreach: @@ -303,36 +343,48 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); + struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ struct rt6_info *ort; /* Original route */ struct dst_entry *dst; int mtu; - int local; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest); - if (!rt) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rt6_info *) dest_dst->dst_cache; + else { u32 cookie; + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock(&dest->dst_lock); + goto err_unreach; + } dst = __ip_vs_route_output_v6(net, &dest->addr.in6, - &dest->dst_saddr.in6, + &dest_dst->dst_saddr.in6, do_xfrm); if (!dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); goto err_unreach; } rt = (struct rt6_info *) dst; cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); + spin_unlock(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", - &dest->addr.in6, &dest->dst_saddr.in6, + &dest->addr.in6, &dest_dst->dst_saddr.in6, atomic_read(&rt->dst.__refcnt)); } if (ret_saddr) - *ret_saddr = dest->dst_saddr.in6; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.in6; } else { + noref = 0; dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); if (!dst) goto err_unreach; @@ -367,7 +419,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, goto err_put; } /* skb to local stack, preserve old route */ - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return local; } @@ -399,12 +452,19 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, } skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); return local; err_put: - dst_release(&rt->dst); + if (!noref) + dst_release(&rt->dst); return -1; err_unreach: @@ -494,6 +554,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL) < 0) goto tx_error; @@ -504,12 +565,14 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -521,6 +584,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, { EnterFunction(10); + rcu_read_lock(); if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; @@ -529,12 +593,14 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -553,6 +619,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; @@ -620,12 +687,14 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); return rc; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -640,6 +709,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; @@ -707,6 +777,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); return rc; @@ -714,6 +785,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: LeaveFunction(10); kfree_skb(skb); + rcu_read_unlock(); return NF_STOLEN; } #endif @@ -755,6 +827,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -762,8 +835,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL, &saddr); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); + } rt = skb_rtable(skb); tdev = rt->dst.dev; @@ -818,6 +893,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); @@ -825,6 +901,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -844,6 +921,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | @@ -851,8 +929,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); + } rt = (struct rt6_info *) skb_dst(skb); tdev = rt->dst.dev; @@ -901,6 +981,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip6_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); @@ -908,6 +989,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -926,14 +1008,17 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); + } ip_send_check(ip_hdr(skb)); @@ -941,12 +1026,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -960,25 +1047,30 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); + rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL); if (local < 0) goto tx_error; - if (local) + if (local) { + rcu_read_unlock(); return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); + } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1023,6 +1115,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; + rcu_read_lock(); local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL); if (local < 0) goto tx_error; @@ -1067,10 +1160,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); goto out; tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); @@ -1111,6 +1206,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; + rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) @@ -1156,10 +1252,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); goto out; tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); -- cgit v1.2.3-70-g09d2 From 363c97d7435ebba8a040f86e29bdec79ee182f0c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:07 +0200 Subject: ipvs: convert app locks We use locks like tcp_app_lock, udp_app_lock, sctp_app_lock to protect access to the protocol hash tables from readers in packet context while the application instances (inc) are [un]registered under global mutex. As the hash tables are mostly read when conns are created and bound to app, use RCU for readers and reclaim app instance after grace period. Simplify ip_vs_app_inc_get because we use usecnt only for statistics and rely on module refcounting. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 4 +--- net/netfilter/ipvs/ip_vs_app.c | 27 +++++++++++++++++++-------- net/netfilter/ipvs/ip_vs_ftp.c | 2 ++ net/netfilter/ipvs/ip_vs_proto_sctp.c | 18 ++++++------------ net/netfilter/ipvs/ip_vs_proto_tcp.c | 18 ++++++------------ net/netfilter/ipvs/ip_vs_proto_udp.c | 19 ++++++------------- 6 files changed, 40 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a150ff5d838..84ca17141a4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -823,6 +823,7 @@ struct ip_vs_app { struct ip_vs_app *app; /* its real application */ __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ + struct rcu_head rcu_head; /* * output hook: Process packet in inout direction, diff set for TCP. @@ -908,7 +909,6 @@ struct netns_ipvs { #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) struct list_head tcp_apps[TCP_APP_TAB_SIZE]; - spinlock_t tcp_app_lock; #endif /* ip_vs_proto_udp */ #ifdef CONFIG_IP_VS_PROTO_UDP @@ -916,7 +916,6 @@ struct netns_ipvs { #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) struct list_head udp_apps[UDP_APP_TAB_SIZE]; - spinlock_t udp_app_lock; #endif /* ip_vs_proto_sctp */ #ifdef CONFIG_IP_VS_PROTO_SCTP @@ -925,7 +924,6 @@ struct netns_ipvs { #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) /* Hash table for SCTP application incarnations */ struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; - spinlock_t sctp_app_lock; #endif /* ip_vs_conn */ atomic_t conn_count; /* connection counter */ diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 0b779d7df88..a95603004bc 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) module_put(app->module); } +static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) +{ + kfree(inc->timeout_table); + kfree(inc); +} + +static void ip_vs_app_inc_rcu_free(struct rcu_head *head) +{ + struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); + + ip_vs_app_inc_destroy(inc); +} /* * Allocate/initialize app incarnation and register it in proto apps. @@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, return 0; out: - kfree(inc->timeout_table); - kfree(inc); + ip_vs_app_inc_destroy(inc); return ret; } @@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) list_del(&inc->a_list); - kfree(inc->timeout_table); - kfree(inc); + call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); } @@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) { int result; - atomic_inc(&inc->usecnt); - if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) - atomic_dec(&inc->usecnt); + result = ip_vs_app_get(inc->app); + if (result) + atomic_inc(&inc->usecnt); return result; } @@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) */ void ip_vs_app_inc_put(struct ip_vs_app *inc) { - ip_vs_app_put(inc->app); atomic_dec(&inc->usecnt); + ip_vs_app_put(inc->app); } @@ -218,6 +228,7 @@ out_unlock: /* * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services + * Caller should use synchronize_rcu() or rcu_barrier() */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 4f53a5f0443..7f90825ef1c 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -480,6 +480,7 @@ static int __init ip_vs_ftp_init(void) int rv; rv = register_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns on error */ return rv; } @@ -489,6 +490,7 @@ static int __init ip_vs_ftp_init(void) static void __exit ip_vs_ftp_exit(void) { unregister_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns */ } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index cd1d7298f7b..f7190cdf023 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc) hash = sctp_app_hashkey(port); - spin_lock_bh(&ipvs->sctp_app_lock); list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->sctp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->sctp_app_lock); return ret; } static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); - spin_lock_bh(&ipvs->sctp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->sctp_app_lock); + list_del_rcu(&inc->p_list); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) @@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - spin_lock(&ipvs->sctp_app_lock); - list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); out: return result; } @@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); - spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9af653a7582..0bbc3feae68 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) hash = tcp_app_hashkey(port); - spin_lock_bh(&ipvs->tcp_app_lock); list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } @@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) static void tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); - spin_lock_bh(&ipvs->tcp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->tcp_app_lock); + list_del_rcu(&inc->p_list); } @@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&ipvs->tcp_app_lock); - list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); out: return result; @@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); - spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 503a842c90d..1a03e2d9c6b 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc) hash = udp_app_hashkey(port); - - spin_lock_bh(&ipvs->udp_app_lock); list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->udp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->udp_app_lock); return ret; } @@ -380,12 +377,9 @@ static void udp_unregister_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); - struct netns_ipvs *ipvs = net_ipvs(net); - spin_lock_bh(&ipvs->udp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->udp_app_lock); + list_del_rcu(&inc->p_list); } @@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - spin_lock(&ipvs->udp_app_lock); - list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); out: return result; @@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); - spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); if (!pd->timeout_table) -- cgit v1.2.3-70-g09d2 From 276472eae063d717b775fdfc87529937402d0e08 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:08 +0200 Subject: ipvs: remove rs_lock by using RCU rs_lock was used to protect rs_table (hash table) from updaters (under global mutex) and readers (packet handlers). We can remove rs_lock by using RCU lock for readers. Reclaiming dest only with kfree_rcu is enough because the readers access only fields from the ip_vs_dest structure. Use hlist for rs_table. As we are now using hlist_del_rcu, introduce in_rs_table flag as replacement for the list_empty checks which do not work with RCU. It is needed because only NAT dests are in the rs_table. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 14 ++++--- net/netfilter/ipvs/ip_vs_core.c | 5 +-- net/netfilter/ipvs/ip_vs_ctl.c | 88 +++++++++++++++-------------------------- 3 files changed, 41 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 84ca17141a4..b06aa6c939f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -738,7 +738,7 @@ struct ip_vs_dest_dst { */ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ - struct list_head d_list; /* for table with all the dests */ + struct hlist_node d_list; /* for table with all the dests */ u16 af; /* address family */ __be16 port; /* port number of the server */ @@ -767,6 +767,9 @@ struct ip_vs_dest { __be16 vport; /* virtual port number */ union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ + + struct rcu_head rcu_head; + unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -897,7 +900,7 @@ struct netns_ipvs { #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - struct list_head rs_table[IP_VS_RTAB_SIZE]; + struct hlist_head rs_table[IP_VS_RTAB_SIZE]; /* ip_vs_app */ struct list_head app_list; /* ip_vs_proto */ @@ -933,7 +936,6 @@ struct netns_ipvs { int num_services; /* no of virtual services */ - rwlock_t rs_lock; /* real services table */ /* Trash for destinations */ struct list_head dest_trash; /* Service counters */ @@ -1376,9 +1378,9 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) atomic_dec(&svc->usecnt); } -extern struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, - const union nf_inet_addr *daddr, __be16 dport); +extern bool +ip_vs_has_real_service(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 6ad24e7ef44..4fc749c6ec2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1161,9 +1161,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(net, af, iph.protocol, - &iph.saddr, - pptr[0])) { + if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, + pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ef48cc55f77..182d95807bd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -508,17 +508,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af, & IP_VS_RTAB_MASK; } -/* - * Hashes ip_vs_dest in rs_table by . - * should be called with locked tables. - */ -static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) +/* Hash ip_vs_dest in rs_table by . */ +static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned int hash; - if (!list_empty(&dest->d_list)) { - return 0; - } + if (dest->in_rs_table) + return; /* * Hash by proto,addr,port, @@ -526,60 +522,47 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ipvs->rs_table[hash]); - - return 1; + hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); + dest->in_rs_table = 1; } -/* - * UNhashes ip_vs_dest from rs_table. - * should be called with locked tables. - */ -static int ip_vs_rs_unhash(struct ip_vs_dest *dest) +/* Unhash ip_vs_dest from rs_table. */ +static void ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* * Remove it from the rs_table table. */ - if (!list_empty(&dest->d_list)) { - list_del_init(&dest->d_list); + if (dest->in_rs_table) { + hlist_del_rcu(&dest->d_list); + dest->in_rs_table = 0; } - - return 1; } -/* - * Lookup real service by in the real service table. - */ -struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, - const union nf_inet_addr *daddr, - __be16 dport) +/* Check if real service by is present */ +bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport) { struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash; struct ip_vs_dest *dest; - /* - * Check for "full" addressed entries - * Return the first found entry - */ + /* Check for "full" addressed entries */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&ipvs->rs_lock); - list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { - if ((dest->af == af) - && ip_vs_addr_equal(af, &dest->addr, daddr) - && (dest->port == dport) - && ((dest->protocol == protocol) || - dest->vfwmark)) { + rcu_read_lock(); + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { + if (dest->port == dport && + dest->af == af && + ip_vs_addr_equal(af, &dest->addr, daddr) && + (dest->protocol == protocol || dest->vfwmark)) { /* HIT */ - read_unlock(&ipvs->rs_lock); - return dest; + rcu_read_unlock(); + return true; } } - read_unlock(&ipvs->rs_lock); + rcu_read_unlock(); - return NULL; + return false; } /* @@ -612,9 +595,6 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * the backup synchronization daemon. It finds the * destination to be bound to the received connection * on the backup. - * - * ip_vs_lookup_real_service() looked promissing, but - * seems not working as expected. */ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, @@ -715,7 +695,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); - kfree(dest); + kfree_rcu(dest, rcu_head); } } @@ -742,7 +722,7 @@ static void ip_vs_trash_cleanup(struct net *net) __ip_vs_dst_cache_reset(dest); __ip_vs_unbind_svc(dest); free_percpu(dest->stats.cpustats); - kfree(dest); + kfree_rcu(dest, rcu_head); } } @@ -807,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_hash(ipvs, dest); - write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -905,7 +883,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atomic_set(&dest->persistconns, 0); atomic_set(&dest->refcnt, 1); - INIT_LIST_HEAD(&dest->d_list); + INIT_HLIST_NODE(&dest->d_list); spin_lock_init(&dest->dst_lock); spin_lock_init(&dest->stats.lock); __ip_vs_update_dest(svc, dest, udest, 1); @@ -1045,9 +1023,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&ipvs->rs_lock); /* * Decrease the refcnt of the dest, and free the dest @@ -1067,7 +1043,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) time, so the operation here is OK */ atomic_dec(&dest->svc->refcnt); free_percpu(dest->stats.cpustats); - kfree(dest); + kfree_rcu(dest, rcu_head); } else { IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " "dest->refcnt=%d\n", @@ -3811,11 +3787,9 @@ int __net_init ip_vs_control_net_init(struct net *net) int idx; struct netns_ipvs *ipvs = net_ipvs(net); - rwlock_init(&ipvs->rs_lock); - /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); + INIT_HLIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); atomic_set(&ipvs->ftpsvc_counter, 0); @@ -3892,7 +3866,7 @@ int __init ip_vs_control_init(void) EnterFunction(2); - /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ + /* Initialize svc_table, ip_vs_svc_fwm_table */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); -- cgit v1.2.3-70-g09d2 From 088339a57d6042a8a19a3d5794594b558cd7b624 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:10 +0200 Subject: ipvs: convert connection locking Convert __ip_vs_conntbl_lock_array as follows: - readers that do not modify conn lists will use RCU lock - updaters that modify lists will use spinlock_t Now for conn lookups we will use RCU read-side critical section. Without using __ip_vs_conn_get such places have access to connection fields and can dereference some pointers like pe and pe_data plus the ability to update timer expiration. If full access is required we contend for reference. We add barrier in __ip_vs_conn_put, so that other CPUs see the refcnt operation after other writes. With the introduction of ip_vs_conn_unlink() we try to reorganize ip_vs_conn_expire(), so that unhashing of connections that should stay more time is avoided, even if it is for very short time. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 +++ net/netfilter/ipvs/ip_vs_conn.c | 230 +++++++++++++++++++++------------------- 2 files changed, 134 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b06aa6c939f..5700b07b518 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -620,6 +620,8 @@ struct ip_vs_conn { const struct ip_vs_pe *pe; char *pe_data; __u8 pe_data_len; + + struct rcu_head rcu_head; }; /* @@ -1185,9 +1187,19 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, int inverse); +/* Get reference to gain full access to conn. + * By default, RCU read-side critical sections have access only to + * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference. + */ +static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp) +{ + return atomic_inc_not_zero(&cp->refcnt); +} + /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) { + smp_mb__before_atomic_dec(); atomic_dec(&cp->refcnt); } extern void ip_vs_conn_put(struct ip_vs_conn *cp); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 704e514e02a..b0cd2be01d7 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly; struct ip_vs_aligned_lock { - rwlock_t l; + spinlock_t l; } __attribute__((__aligned__(SMP_CACHE_BYTES))); /* lock array for conn table */ static struct ip_vs_aligned_lock __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; -static inline void ct_read_lock(unsigned int key) -{ - read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock(unsigned int key) -{ - read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - static inline void ct_write_lock(unsigned int key) { - write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } static inline void ct_write_unlock(unsigned int key) { - write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_lock_bh(unsigned int key) -{ - read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock_bh(unsigned int key) -{ - read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_lock_bh(unsigned int key) -{ - write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_unlock_bh(unsigned int key) -{ - write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } @@ -201,9 +171,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); + hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { pr_err("%s(): request for already hashed, called from %pF\n", @@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) /* * UNhashes ip_vs_conn from ip_vs_conn_tab. - * returns bool success. + * returns bool success. Caller should hold conn reference. */ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) { @@ -234,7 +204,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - hlist_del(&cp->c_list); + hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; atomic_dec(&cp->refcnt); ret = 1; @@ -247,6 +217,36 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) return ret; } +/* Try to unlink ip_vs_conn from ip_vs_conn_tab. + * returns bool success. + */ +static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) +{ + unsigned int hash; + bool ret; + + hash = ip_vs_conn_hashkey_conn(cp); + + ct_write_lock(hash); + spin_lock(&cp->lock); + + if (cp->flags & IP_VS_CONN_F_HASHED) { + ret = false; + /* Decrease refcnt and unlink conn only if we are last user */ + if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) { + hlist_del_rcu(&cp->c_list); + cp->flags &= ~IP_VS_CONN_F_HASHED; + ret = true; + } + } else + ret = atomic_read(&cp->refcnt) ? false : true; + + spin_unlock(&cp->lock); + ct_write_unlock(hash); + + return ret; +} + /* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. @@ -262,9 +262,9 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && @@ -272,14 +272,15 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); return cp; } } - ct_read_unlock(hash); + rcu_read_unlock(); return NULL; } @@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (!ip_vs_conn_net_eq(cp, p->net)) continue; if (p->pe_data && p->pe->ct_match) { - if (p->pe == cp->pe && p->pe->ct_match(p, cp)) - goto out; + if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { + if (__ip_vs_conn_get(cp)) + goto out; + } continue; } @@ -365,15 +368,15 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) p->af, p->vaddr, &cp->vaddr) && p->cport == cp->cport && p->vport == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol) - goto out; + p->protocol == cp->protocol) { + if (__ip_vs_conn_get(cp)) + goto out; + } } cp = NULL; out: - if (cp) - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -398,23 +401,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) */ hash = ip_vs_conn_hashkey_param(p, true); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); ret = cp; break; } } - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -757,41 +761,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct) * Simply decrease the refcnt of the template, * don't restart its timer. */ - atomic_dec(&ct->refcnt); + __ip_vs_conn_put(ct); return 0; } return 1; } +static void ip_vs_conn_rcu_free(struct rcu_head *head) +{ + struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn, + rcu_head); + + ip_vs_pe_put(cp->pe); + kfree(cp->pe_data); + kmem_cache_free(ip_vs_conn_cachep, cp); +} + static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; struct net *net = ip_vs_conn_net(cp); struct netns_ipvs *ipvs = net_ipvs(net); - cp->timeout = 60*HZ; - - /* - * hey, I'm using it - */ - atomic_inc(&cp->refcnt); - /* * do I control anybody? */ if (atomic_read(&cp->n_control)) goto expire_later; - /* - * unhash it if it is hashed in the conn table - */ - if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) - goto expire_later; - - /* - * refcnt==1 implies I'm the only one referrer - */ - if (likely(atomic_read(&cp->refcnt) == 1)) { + /* Unlink conn if not referenced anymore */ + if (likely(ip_vs_conn_unlink(cp))) { /* delete the timer if it is activated by other users */ del_timer(&cp->timer); @@ -810,38 +809,41 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_conn_drop_conntrack(cp); } - ip_vs_pe_put(cp->pe); - kfree(cp->pe_data); if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); + call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); atomic_dec(&ipvs->conn_count); - - kmem_cache_free(ip_vs_conn_cachep, cp); return; } - /* hash it back to the table */ - ip_vs_conn_hash(cp); - expire_later: - IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", - atomic_read(&cp->refcnt)-1, + IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n", + atomic_read(&cp->refcnt), atomic_read(&cp->n_control)); + atomic_inc(&cp->refcnt); + cp->timeout = 60*HZ; + if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); ip_vs_conn_put(cp); } - +/* Modify timer, so that it expires as soon as possible. + * Can be called without reference only if under RCU lock. + */ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) { - if (del_timer(&cp->timer)) - mod_timer(&cp->timer, jiffies); + /* Using mod_timer_pending will ensure the timer is not + * modified after the final del_timer in ip_vs_conn_expire. + */ + if (timer_pending(&cp->timer) && + time_after(cp->timer.expires, jiffies)) + mod_timer_pending(&cp->timer, jiffies); } @@ -952,14 +954,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + /* __ip_vs_conn_get() is not needed by + * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show + */ if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; return cp; } } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } return NULL; @@ -977,6 +982,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; + struct hlist_node *e; struct hlist_head *l = iter->l; int idx; @@ -985,19 +991,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if (cp->c_list.next) - return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); + e = rcu_dereference(hlist_next_rcu(&cp->c_list)); + if (e) + return hlist_entry(e, struct ip_vs_conn, c_list); + rcu_read_unlock(); idx = l - ip_vs_conn_tab; - ct_read_unlock_bh(idx); - while (++idx < ip_vs_conn_tab_size) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } iter->l = NULL; return NULL; @@ -1009,7 +1015,7 @@ static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) struct hlist_head *l = iter->l; if (l) - ct_read_unlock_bh(l - ip_vs_conn_tab); + rcu_read_unlock(); } static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) @@ -1188,7 +1194,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) void ip_vs_random_dropentry(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; /* * Randomly scan 1/32 of the whole table every second @@ -1199,9 +1205,9 @@ void ip_vs_random_dropentry(struct net *net) /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1228,12 +1234,15 @@ void ip_vs_random_dropentry(struct net *net) IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(hash); + rcu_read_unlock(); } } @@ -1244,7 +1253,7 @@ void ip_vs_random_dropentry(struct net *net) static void ip_vs_conn_flush(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; struct netns_ipvs *ipvs = net_ipvs(net); flush_again: @@ -1252,19 +1261,22 @@ flush_again: /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(idx); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(idx); + rcu_read_unlock(); } /* the counter may be not NULL, because maybe some conn entries @@ -1331,7 +1343,7 @@ int __init ip_vs_conn_init(void) INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { - rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); + spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l); } /* calculate the random value for connection hash */ @@ -1342,6 +1354,8 @@ int __init ip_vs_conn_init(void) void ip_vs_conn_cleanup(void) { + /* Wait all ip_vs_conn_rcu_free() callbacks to complete */ + rcu_barrier(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); -- cgit v1.2.3-70-g09d2 From 1845ed0bb29fa7864781021e0c8d06af318f358a Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:11 +0200 Subject: ipvs: reorder keys in connection structure __ip_vs_conn_in_get and ip_vs_conn_out_get are hot places. Optimize them, so that ports are matched first. By moving net and fwmark below, on 32-bit arch we can fit caddr in 32-byte cache line and all addresses in 64-byte cache line. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++++++------ net/netfilter/ipvs/ip_vs_conn.c | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 5700b07b518..929e04c6b82 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -566,20 +566,19 @@ struct ip_vs_conn_param { */ struct ip_vs_conn { struct hlist_node c_list; /* hashed list heads */ -#ifdef CONFIG_NET_NS - struct net *net; /* Name space */ -#endif /* Protocol, addresses and port numbers */ - u16 af; /* address family */ __be16 cport; - __be16 vport; __be16 dport; - __u32 fwmark; /* Fire wall mark from skb */ + __be16 vport; + u16 af; /* address family */ union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ volatile __u32 flags; /* status flags */ __u16 protocol; /* Which protocol (TCP/UDP) */ +#ifdef CONFIG_NET_NS + struct net *net; /* Name space */ +#endif /* counter and timer */ atomic_t refcnt; /* reference count */ @@ -593,6 +592,7 @@ struct ip_vs_conn { * state transition triggerd * synchronization */ + __u32 fwmark; /* Fire wall mark from skb */ unsigned long sync_endtime; /* jiffies + sent_retries */ /* Control members */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b0cd2be01d7..416015b34c2 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -265,8 +265,8 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->cport == cp->cport && p->vport == cp->vport && + if (p->cport == cp->cport && p->vport == cp->vport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && @@ -350,9 +350,9 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (!ip_vs_conn_net_eq(cp, p->net)) - continue; - if (p->pe_data && p->pe->ct_match) { + if (unlikely(p->pe_data && p->pe->ct_match)) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { if (__ip_vs_conn_get(cp)) goto out; @@ -366,9 +366,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) * p->vaddr is a fwmark */ ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && + p->vport == cp->vport && p->cport == cp->cport && cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { if (__ip_vs_conn_get(cp)) goto out; } @@ -404,8 +405,8 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->vport == cp->cport && p->cport == cp->dport && + if (p->vport == cp->cport && p->cport == cp->dport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && -- cgit v1.2.3-70-g09d2 From 9a05475cebdd6341884b5901e53870be26e65158 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 21 Mar 2013 11:58:12 +0200 Subject: ipvs: avoid kmem_cache_zalloc in ip_vs_conn_new We have many fields to set and few to reset, use kmem_cache_alloc instead to save some cycles. Signed-off-by: Julian Anastasov Signed-off by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 15 +++++++++++++++ net/netfilter/ipvs/ip_vs_conn.c | 24 +++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 929e04c6b82..43886bb282f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, dst->ip = src->ip; } +static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst, + const union nf_inet_addr *src) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + dst->in6 = src->in6; + return; + } +#endif + dst->ip = src->ip; + dst->all[1] = 0; + dst->all[2] = 0; + dst->all[3] = 0; +} + static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, const union nf_inet_addr *b) { diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 416015b34c2..e3e2b4d3b6d 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -861,7 +861,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, p->protocol); - cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); + cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { IP_VS_ERR_RL("%s(): no memory\n", __func__); return NULL; @@ -872,13 +872,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; - ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); + ip_vs_addr_set(p->af, &cp->caddr, p->caddr); cp->cport = p->cport; - ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); + ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr); cp->vport = p->vport; /* proto should only be IPPROTO_IP if d_addr is a fwmark */ - ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, - &cp->daddr, daddr); + ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, + &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; cp->fwmark = fwmark; @@ -887,6 +887,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, cp->pe = p->pe; cp->pe_data = p->pe_data; cp->pe_data_len = p->pe_data_len; + } else { + cp->pe = NULL; + cp->pe_data = NULL; + cp->pe_data_len = 0; } spin_lock_init(&cp->lock); @@ -897,18 +901,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, */ atomic_set(&cp->refcnt, 1); + cp->control = NULL; atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); + cp->packet_xmit = NULL; + cp->app = NULL; + cp->app_data = NULL; + /* reset struct ip_vs_seq */ + cp->in_seq.delta = 0; + cp->out_seq.delta = 0; + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); /* Bind the connection with a destination server */ + cp->dest = NULL; ip_vs_bind_dest(cp, dest); /* Set its state and timeout */ cp->state = 0; + cp->old_state = 0; cp->timeout = 3*HZ; cp->sync_endtime = jiffies & ~3UL; -- cgit v1.2.3-70-g09d2 From 6b6df46663e7aa6f7b1d82435a3488f9b81316b3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:37 +0200 Subject: ipvs: preparations for using rcu in schedulers Allow schedulers to use rcu_dereference when returning destination on lookup. The RCU read-side critical section will allow ip_vs_bind_dest to get dest refcnt as preparation for the step where destinations will be deleted without an IP_VS_WAIT_WHILE guard that holds the packet processing during update. Add new optional scheduler methods add_dest, del_dest and upd_dest. For now the methods are called together with update_service but update_service will be removed in a following change. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 6 ++++++ net/netfilter/ipvs/ip_vs_core.c | 6 ++++++ net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++++++ 3 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 43886bb282f..d91385c1b3a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -805,6 +805,12 @@ struct ip_vs_scheduler { int (*done_service)(struct ip_vs_service *svc); /* scheduler updating service */ int (*update_service)(struct ip_vs_service *svc); + /* dest is linked */ + int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); + /* dest is unlinked */ + int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); + /* dest is updated */ + int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); /* selecting a server from the given service */ struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4fc749c6ec2..939ad11ed53 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -301,8 +301,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ + rcu_read_lock(); dest = svc->scheduler->schedule(svc, skb); if (!dest) { + rcu_read_unlock(); IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); *ignored = 0; @@ -318,6 +320,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * when the template expires */ ct = ip_vs_conn_new(¶m, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); + rcu_read_unlock(); if (ct == NULL) { kfree(param.pe_data); *ignored = -1; @@ -446,8 +449,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return NULL; } + rcu_read_lock(); dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { + rcu_read_unlock(); IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } @@ -468,6 +473,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); + rcu_read_unlock(); if (!cp) { *ignored = -1; return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 182d95807bd..d64f800a342 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -825,6 +825,11 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, if (add) { list_add(&dest->n_list, &svc->destinations); svc->num_dests++; + if (svc->scheduler->add_dest) + svc->scheduler->add_dest(svc, dest); + } else { + if (svc->scheduler->upd_dest) + svc->scheduler->upd_dest(svc, dest); } /* call the update_service, because server weight may be changed */ @@ -1071,6 +1076,9 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, list_del(&dest->n_list); svc->num_dests--; + if (svcupd && svc->scheduler->del_dest) + svc->scheduler->del_dest(svc, dest); + /* * Call the update_service function of its scheduler */ -- cgit v1.2.3-70-g09d2 From fca9c20ae1e510525f8a2aaa25861789fd721193 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:38 +0200 Subject: ipvs: add ip_vs_dest_hold and ip_vs_dest_put ip_vs_dest_hold will be used under RCU lock while ip_vs_dest_put can be called even after dest is removed from service, as it happens for conns and some schedulers. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 10 ++++++++++ net/netfilter/ipvs/ip_vs_conn.c | 9 ++------- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 4 files changed, 16 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d91385c1b3a..7d3027f2c06 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1427,6 +1427,16 @@ ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, __u16 protocol, __u32 fwmark, __u32 flags); extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); +static inline void ip_vs_dest_hold(struct ip_vs_dest *dest) +{ + atomic_inc(&dest->refcnt); +} + +static inline void ip_vs_dest_put(struct ip_vs_dest *dest) +{ + smp_mb__before_atomic_dec(); + atomic_dec(&dest->refcnt); +} /* * IPVS sync daemon data and function prototypes diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e3e2b4d3b6d..1b29e4a2b26 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -554,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) return; /* Increase the refcnt counter of the dest */ - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); conn_flags = atomic_read(&dest->conn_flags); if (cp->protocol != IPPROTO_UDP) @@ -700,12 +700,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) dest->flags &= ~IP_VS_DEST_F_OVERLOAD; } - /* - * Simply decrease the refcnt of the dest, because the - * dest will be either in service's destination list - * or in the trash. - */ - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); } static int expire_quiescent_template(struct netns_ipvs *ipvs, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d64f800a342..a4f63888047 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -616,7 +616,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); if (dest) - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); ip_vs_service_put(svc); return dest; } @@ -1056,7 +1056,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) ntohs(dest->port), atomic_read(&dest->refcnt)); list_add(&dest->n_list, &ipvs->dest_trash); - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 44fd10c539a..6cc3e52f1f3 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -861,7 +861,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (!dest) { dest = ip_vs_try_bind_dest(cp); if (dest) - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); } } else { /* @@ -874,7 +874,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); if (dest) - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); if (!cp) { if (param->pe_data) kfree(param->pe_data); -- cgit v1.2.3-70-g09d2 From 578bc3ef1e473abb9ea99046a307fef0094b22af Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:49 +0200 Subject: ipvs: reorganize dest trash All dests will go to trash, no exceptions. But we have to use new list node t_list for this, due to RCU changes in following patches. Dests will wait there initial grace period and later all conns and schedulers to put their reference. The dests don't get reference for staying in dest trash as before. As result, we do not load ip_vs_dest_put with extra checks for last refcnt and the schedulers do not need to play games with atomic_inc_not_zero while selecting best destination. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 9 +++ net/netfilter/ipvs/ip_vs_ctl.c | 178 ++++++++++++++++++++++++----------------- 2 files changed, 113 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7d3027f2c06..18aeb85079f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -749,6 +749,8 @@ struct ip_vs_dest_dst { struct rcu_head rcu_head; }; +/* In grace period after removing */ +#define IP_VS_DEST_STATE_REMOVING 0x01 /* * The real server destination forwarding entry * with ip address, port number, and so on. @@ -766,6 +768,7 @@ struct ip_vs_dest { atomic_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ + unsigned long state; /* state flags */ /* connection counters and thresholds */ atomic_t activeconns; /* active connections */ @@ -785,6 +788,7 @@ struct ip_vs_dest { union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ + struct list_head t_list; /* in dest_trash */ struct rcu_head rcu_head; unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -912,6 +916,9 @@ struct ipvs_master_sync_state { struct netns_ipvs *ipvs; }; +/* How much time to keep dests in trash */ +#define IP_VS_DEST_TRASH_PERIOD (120 * HZ) + /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ @@ -961,6 +968,8 @@ struct netns_ipvs { /* Trash for destinations */ struct list_head dest_trash; + spinlock_t dest_trash_lock; + struct timer_list dest_trash_timer; /* expiration timer */ /* Service counters */ atomic_t ftpsvc_counter; atomic_t nullsvc_counter; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a4f63888047..80d366b7daa 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -71,7 +71,7 @@ int ip_vs_get_debug_level(void) /* Protos */ -static void __ip_vs_del_service(struct ip_vs_service *svc); +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); #ifdef CONFIG_IP_VS_IPV6 @@ -657,19 +657,25 @@ static struct ip_vs_dest * ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __be16 dport) { - struct ip_vs_dest *dest, *nxt; + struct ip_vs_dest *dest; struct netns_ipvs *ipvs = net_ipvs(svc->net); /* * Find the destination in trash */ - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); + /* We can not reuse dest while in grace period + * because conns still can use dest->svc + */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; if (dest->af == svc->af && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && @@ -679,29 +685,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ - return dest; - } - - /* - * Try to purge the destination from trash if not referenced - */ - if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " - "from trash\n", - dest->vfwmark, - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port)); - list_del(&dest->n_list); - __ip_vs_dst_cache_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree_rcu(dest, rcu_head); + list_del(&dest->t_list); + ip_vs_dest_hold(dest); + goto out; } } - return NULL; + dest = NULL; + +out: + spin_unlock_bh(&ipvs->dest_trash_lock); + + return dest; } +static void ip_vs_dest_free(struct ip_vs_dest *dest) +{ + __ip_vs_dst_cache_reset(dest); + __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); + kfree(dest); +} /* * Clean up all the destinations in the trash @@ -710,19 +714,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * When the ip_vs_control_clearup is activated by ipvs module exit, * the service tables must have been flushed and all the connections * are expired, and the refcnt of each destination in the trash must - * be 1, so we simply release them here. + * be 0, so we simply release them here. */ static void ip_vs_trash_cleanup(struct net *net) { struct ip_vs_dest *dest, *nxt; struct netns_ipvs *ipvs = net_ipvs(net); - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { - list_del(&dest->n_list); - __ip_vs_dst_cache_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree_rcu(dest, rcu_head); + del_timer_sync(&ipvs->dest_trash_timer); + /* No need to use dest_trash_lock */ + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { + list_del(&dest->t_list); + ip_vs_dest_free(dest); } } @@ -955,11 +958,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); - /* - * Get the destination from the trash - */ - list_del(&dest->n_list); - __ip_vs_update_dest(svc, dest, udest, 1); ret = 0; } else { @@ -1015,11 +1013,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } +static void ip_vs_dest_wait_readers(struct rcu_head *head) +{ + struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, + rcu_head); + + /* End of grace period after unlinking */ + clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); +} + /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, + bool cleanup) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1030,34 +1038,22 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) */ ip_vs_rs_unhash(dest); - /* - * Decrease the refcnt of the dest, and free the dest - * if nobody refers to it (refcnt=0). Otherwise, throw - * the destination into the trash. - */ - if (atomic_dec_and_test(&dest->refcnt)) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n", - dest->vfwmark, - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port)); - __ip_vs_dst_cache_reset(dest); - /* simply decrease svc->refcnt here, let the caller check - and release the service if nobody refers to it. - Only user context can release destination and service, - and only one user context can update virtual service at a - time, so the operation here is OK */ - atomic_dec(&dest->svc->refcnt); - free_percpu(dest->stats.cpustats); - kfree_rcu(dest, rcu_head); - } else { - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " - "dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port), - atomic_read(&dest->refcnt)); - list_add(&dest->n_list, &ipvs->dest_trash); - ip_vs_dest_hold(dest); + if (!cleanup) { + set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); + call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); } + + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + /* dest lives in trash without reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_dest_put(dest); } @@ -1122,13 +1118,38 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, false); LeaveFunction(2); return 0; } +static void ip_vs_dest_trash_expire(unsigned long data) +{ + struct net *net = (struct net *) data; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest *dest, *next; + + spin_lock(&ipvs->dest_trash_lock); + list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { + /* Skip if dest is in grace period */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; + if (atomic_read(&dest->refcnt) > 0) + continue; + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), + ntohs(dest->port)); + list_del(&dest->t_list); + ip_vs_dest_free(dest); + } + if (!list_empty(&ipvs->dest_trash)) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + spin_unlock(&ipvs->dest_trash_lock); +} /* * Add a service into the service hash table @@ -1358,7 +1379,7 @@ out: * - The service must be unlinked, unlocked and not referenced! * - We are called under _bh lock */ -static void __ip_vs_del_service(struct ip_vs_service *svc) +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) { struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; @@ -1394,7 +1415,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, cleanup); } /* @@ -1424,7 +1445,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* * Unlink a service from list and try to delete it if its refcnt reached 0 */ -static void ip_vs_unlink_service(struct ip_vs_service *svc) +static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) { /* * Unhash it from the service table @@ -1438,7 +1459,7 @@ static void ip_vs_unlink_service(struct ip_vs_service *svc) */ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - __ip_vs_del_service(svc); + __ip_vs_del_service(svc, cleanup); write_unlock_bh(&__ip_vs_svc_lock); } @@ -1450,7 +1471,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) { if (svc == NULL) return -EEXIST; - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, false); return 0; } @@ -1459,7 +1480,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(struct net *net) +static int ip_vs_flush(struct net *net, bool cleanup) { int idx; struct ip_vs_service *svc, *nxt; @@ -1471,7 +1492,7 @@ static int ip_vs_flush(struct net *net) list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1482,7 +1503,7 @@ static int ip_vs_flush(struct net *net) list_for_each_entry_safe(svc, nxt, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1498,7 +1519,7 @@ void ip_vs_service_net_cleanup(struct net *net) EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(net); + ip_vs_flush(net, true); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } @@ -1558,9 +1579,11 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } } - list_for_each_entry(dest, &ipvs->dest_trash, n_list) { + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { ip_vs_forget_dev(dest, dev); } + spin_unlock_bh(&ipvs->dest_trash_lock); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); return NOTIFY_DONE; @@ -2394,7 +2417,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ @@ -3403,7 +3426,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { ret = ip_vs_genl_set_config(net, info->attrs); @@ -3800,6 +3823,9 @@ int __net_init ip_vs_control_net_init(struct net *net) INIT_HLIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); + spin_lock_init(&ipvs->dest_trash_lock); + setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, + (unsigned long) net); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); @@ -3829,6 +3855,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + /* Some dest can be in grace period even before cleanup, we have to + * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. + */ + rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); -- cgit v1.2.3-70-g09d2 From ed3ffc4e48e2b03d5b23988f3cfa0ad8d79e0092 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:50 +0200 Subject: ipvs: do not expect result from done_service This method releases the scheduler state, it can not fail. Such change will help to properly replace the scheduler in following patch. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 4 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 5 +---- net/netfilter/ipvs/ip_vs_dh.c | 4 +--- net/netfilter/ipvs/ip_vs_lblc.c | 4 +--- net/netfilter/ipvs/ip_vs_lblcr.c | 4 +--- net/netfilter/ipvs/ip_vs_sched.c | 13 ++++--------- net/netfilter/ipvs/ip_vs_sh.c | 4 +--- net/netfilter/ipvs/ip_vs_wrr.c | 4 +--- 8 files changed, 12 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 18aeb85079f..4990de69857 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -806,7 +806,7 @@ struct ip_vs_scheduler { /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); /* scheduling service finish */ - int (*done_service)(struct ip_vs_service *svc); + void (*done_service)(struct ip_vs_service *svc); /* scheduler updating service */ int (*update_service)(struct ip_vs_service *svc); /* dest is linked */ @@ -1392,7 +1392,7 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler); -extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); +extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 80d366b7daa..d02272685dd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1334,10 +1334,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) /* * Unbind the old scheduler */ - if ((ret = ip_vs_unbind_scheduler(svc))) { - old_sched = sched; - goto out_unlock; - } + ip_vs_unbind_scheduler(svc); /* * Bind the new scheduler diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index ebe80f44e71..89c27230d93 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -174,7 +174,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc) } -static int ip_vs_dh_done_svc(struct ip_vs_service *svc) +static void ip_vs_dh_done_svc(struct ip_vs_service *svc) { struct ip_vs_dh_state *s = svc->sched_data; @@ -185,8 +185,6 @@ static int ip_vs_dh_done_svc(struct ip_vs_service *svc) kfree_rcu(s, rcu_head); IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index b873e177435..c7ff9789523 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -388,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblc_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblc_table *tbl = svc->sched_data; @@ -402,8 +402,6 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c22f173d528..6049b85df41 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -555,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblcr_table *tbl = svc->sched_data; @@ -569,8 +569,6 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 7f11d3d37a4..1b715d0caf4 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -64,22 +64,17 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -int ip_vs_unbind_scheduler(struct ip_vs_service *svc) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc) { struct ip_vs_scheduler *sched = svc->scheduler; if (!sched) - return 0; + return; - if (sched->done_service) { - if (sched->done_service(svc) != 0) { - pr_err("%s(): done error\n", __func__); - return -EINVAL; - } - } + if (sched->done_service) + sched->done_service(svc); svc->scheduler = NULL; - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 55e76d8db71..81c1a10c7b4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -187,7 +187,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc) } -static int ip_vs_sh_done_svc(struct ip_vs_service *svc) +static void ip_vs_sh_done_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s = svc->sched_data; @@ -198,8 +198,6 @@ static int ip_vs_sh_done_svc(struct ip_vs_service *svc) kfree_rcu(s, rcu_head); IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); - - return 0; } diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 98cb05e345c..a74fd9bab95 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -129,7 +129,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) +static void ip_vs_wrr_done_svc(struct ip_vs_service *svc) { struct ip_vs_wrr_mark *mark = svc->sched_data; @@ -137,8 +137,6 @@ static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) * Release the mark variable */ kfree_rcu(mark, rcu_head); - - return 0; } -- cgit v1.2.3-70-g09d2 From ba3a3ce14ea26d602b253ef13a56d540827cd51d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:51 +0200 Subject: ipvs: convert sched_lock to spin lock As all read_locks are gone spin lock is preferred. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netfilter/ipvs/ip_vs_lblc.c | 18 +++++++++--------- net/netfilter/ipvs/ip_vs_lblcr.c | 26 +++++++++++++------------- net/netfilter/ipvs/ip_vs_rr.c | 10 +++++----- net/netfilter/ipvs/ip_vs_wrr.c | 8 ++++---- 6 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4990de69857..4a7bc63b9cb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -734,7 +734,7 @@ struct ip_vs_service { /* for scheduling */ struct ip_vs_scheduler *scheduler; /* bound scheduler object */ - rwlock_t sched_lock; /* lock sched_data */ + spinlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ /* alternate persistence engine */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d02272685dd..2bfd807bc93 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1219,7 +1219,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, svc->net = net; INIT_LIST_HEAD(&svc->destinations); - rwlock_init(&svc->sched_lock); + spin_lock_init(&svc->sched_lock); spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index c7ff9789523..ffef8a16214 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -194,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, /* * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP - * address to a server. Called under write lock. + * address to a server. Called under spin lock. */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, @@ -242,7 +242,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) struct hlist_node *next; int i; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); tbl->dead = 1; for (i=0; ibucket[i], list) { @@ -250,7 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) atomic_dec(&tbl->entries); } } - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblc_expiration(struct ip_vs_service *svc) @@ -274,7 +274,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + @@ -284,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -330,7 +330,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; @@ -339,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -527,10 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (!tbl->dead) ip_vs_lblc_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 6049b85df41..cdfe6a95edd 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -368,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, /* * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. Called under write lock. + * IP address to a server. Called under spin lock. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, @@ -412,14 +412,14 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc) struct ip_vs_lblcr_entry *en; struct hlist_node *next; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); tbl->dead = 1; for (i=0; ibucket[i], list) { ip_vs_lblcr_free(en); } } - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblcr_expiration(struct ip_vs_service *svc) @@ -443,7 +443,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_after(en->lastuse + sysctl_lblcr_expiration(svc), now)) @@ -452,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) ip_vs_lblcr_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -498,7 +498,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) for (i=0, j=tbl->rover; isched_lock); + spin_lock(&svc->sched_lock); hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) continue; @@ -507,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -678,7 +678,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + sysctl_lblcr_expiration(svc))) { - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (atomic_read(&en->set.size) > 1) { struct ip_vs_dest *m; @@ -686,7 +686,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (m) ip_vs_dest_set_erase(&en->set, m); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } /* If the destination is not overloaded, use it */ @@ -701,10 +701,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* Update our cache entry */ - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (!tbl->dead) ip_vs_dest_set_insert(&en->set, dest, true); - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); goto out; } @@ -716,10 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); if (!tbl->dead) ip_vs_lblcr_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 3942890c6f1..aa4601ff1cc 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -39,14 +39,14 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct list_head *p; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; /* dest is already unlinked, so p->prev is not valid but * p->next is valid, use it to reach previous entry. */ if (p == &dest->n_list) svc->sched_data = p->next->prev; - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -63,7 +63,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); p = (struct list_head *) svc->sched_data; last = dest = list_entry(p, struct ip_vs_dest, n_list); @@ -85,13 +85,13 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } while (pass < 2 && p != &svc->destinations); stop: - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: svc->sched_data = &dest->n_list; - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index a74fd9bab95..b173ef907a1 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -145,7 +145,7 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, { struct ip_vs_wrr_mark *mark = svc->sched_data; - write_lock_bh(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); @@ -153,7 +153,7 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, mark->cw = mark->mw; else if (mark->di > 1) mark->cw = (mark->cw / mark->di) * mark->di + 1; - write_unlock_bh(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -170,7 +170,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - write_lock(&svc->sched_lock); + spin_lock(&svc->sched_lock); dest = mark->cl; /* No available dests? */ if (mark->mw == 0) @@ -222,7 +222,7 @@ found: mark->cl = dest; out: - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); return dest; err_noavail: -- cgit v1.2.3-70-g09d2 From 413c2d04e9494ca38629d8a7ffeff1e4398a9fe3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:52 +0200 Subject: ipvs: convert dests to rcu In previous commits the schedulers started to access svc->destinations with _rcu list traversal primitives because the IP_VS_WAIT_WHILE macro still plays the role of grace period. Now it is time to finish the updating part, i.e. adding and deleting of dests with _rcu suffix before removing the IP_VS_WAIT_WHILE in next commit. We use the same rule for conns as for the schedulers: dests can be searched in RCU read-side critical section where ip_vs_dest_hold can be called by ip_vs_bind_dest. Some things are not perfect, for example, calling functions like ip_vs_lookup_dest from updating code under RCU, just because we use some function both from reader and from updater. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 8 +++++--- net/netfilter/ipvs/ip_vs_ctl.c | 32 +++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_sync.c | 11 ++++------- 4 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4a7bc63b9cb..78a6634653a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1434,7 +1434,7 @@ extern struct ip_vs_dest * ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, __u32 fwmark, __u32 flags); -extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); +extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp); static inline void ip_vs_dest_hold(struct ip_vs_dest *dest) { diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 1b29e4a2b26..54de34077b6 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -611,10 +611,11 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) * Check if there is a destination for the connection, if so * bind the connection to the destination. */ -struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) +void ip_vs_try_bind_dest(struct ip_vs_conn *cp) { struct ip_vs_dest *dest; + rcu_read_lock(); dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark, cp->flags); @@ -624,7 +625,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->dest) { spin_unlock(&cp->lock); - return dest; + rcu_read_unlock(); + return; } /* Applications work depending on the forwarding method @@ -648,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) if (pd && atomic_read(&pd->appcnt)) ip_vs_bind_app(cp, pd->pp); } - return dest; + rcu_read_unlock(); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2bfd807bc93..0763cc6e092 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -565,8 +565,8 @@ bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, return false; } -/* - * Lookup destination by {addr,port} in the given service +/* Lookup destination by {addr,port} in the given service + * Called under RCU lock. */ static struct ip_vs_dest * ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, @@ -577,7 +577,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find the destination for the given service */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if ((dest->af == svc->af) && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && (dest->port == dport)) { @@ -591,10 +591,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find destination by {daddr,dport,vaddr,protocol} - * Cretaed to be used in ip_vs_process_message() in + * Created to be used in ip_vs_process_message() in * the backup synchronization daemon. It finds the * destination to be bound to the received connection * on the backup. + * Called under RCU lock, no refcnt is returned. */ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, @@ -615,8 +616,6 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, dest = ip_vs_lookup_dest(svc, daddr, port); if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); - if (dest) - ip_vs_dest_hold(dest); ip_vs_service_put(svc); return dest; } @@ -826,7 +825,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); if (add) { - list_add(&dest->n_list, &svc->destinations); + list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; if (svc->scheduler->add_dest) svc->scheduler->add_dest(svc, dest); @@ -933,10 +932,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Check if the dest already exists in the list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest != NULL) { IP_VS_DBG(1, "%s(): dest already exists\n", __func__); @@ -997,10 +996,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Lookup the destination list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); @@ -1069,7 +1068,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, /* * Remove it from the d-linked destination list. */ - list_del(&dest->n_list); + list_del_rcu(&dest->n_list); svc->num_dests--; if (svcupd && svc->scheduler->del_dest) @@ -1094,7 +1093,10 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) EnterFunction(2); + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): destination not found!\n", __func__); @@ -2104,7 +2106,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) else seq_putc(seq, '\n'); - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { #ifdef CONFIG_IP_VS_IPV6 if (dest->af == AF_INET6) seq_printf(seq, diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 6cc3e52f1f3..97241749216 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -858,23 +858,20 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; cp->flags = flags; spin_unlock(&cp->lock); - if (!dest) { - dest = ip_vs_try_bind_dest(cp); - if (dest) - ip_vs_dest_put(dest); - } + if (!dest) + ip_vs_try_bind_dest(cp); } else { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ + rcu_read_lock(); dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); - if (dest) - ip_vs_dest_put(dest); + rcu_read_unlock(); if (!cp) { if (param->pe_data) kfree(param->pe_data); -- cgit v1.2.3-70-g09d2 From ceec4c3816818459d90c92152e61371ff5b1d5a1 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 22 Mar 2013 11:46:53 +0200 Subject: ipvs: convert services to rcu This is the final step in RCU conversion. Things that are removed: - svc->usecnt: now svc is accessed under RCU read lock - svc->inc: and some unused code - ip_vs_bind_pe and ip_vs_unbind_pe: no ability to replace PE - __ip_vs_svc_lock: replaced with RCU - IP_VS_WAIT_WHILE: now readers lookup svcs and dests under RCU and work in parallel with configuration Other changes: - before now, a RCU read-side critical section included the calling of the schedule method, now it is extended to include service lookup - ip_vs_svc_table and ip_vs_svc_fwm_table are now using hlist - svc->pe and svc->scheduler remain to the end (of grace period), the schedulers are prepared for such RCU readers even after done_service is called but they need to use synchronize_rcu because last ip_vs_scheduler_put can happen while RCU read-side critical sections use an outdated svc->scheduler pointer - as planned, update_service is removed - empty services can be freed immediately after grace period. If dests were present, the services are freed from the dest trash code Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 28 +--- net/netfilter/ipvs/ip_vs_core.c | 26 +-- net/netfilter/ipvs/ip_vs_ctl.c | 299 ++++++++++++++-------------------- net/netfilter/ipvs/ip_vs_dh.c | 1 + net/netfilter/ipvs/ip_vs_lblc.c | 1 + net/netfilter/ipvs/ip_vs_lblcr.c | 1 + net/netfilter/ipvs/ip_vs_lc.c | 1 + net/netfilter/ipvs/ip_vs_nq.c | 1 + net/netfilter/ipvs/ip_vs_pe.c | 12 -- net/netfilter/ipvs/ip_vs_proto_sctp.c | 14 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 14 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 14 +- net/netfilter/ipvs/ip_vs_rr.c | 1 + net/netfilter/ipvs/ip_vs_sched.c | 28 ++-- net/netfilter/ipvs/ip_vs_sed.c | 1 + net/netfilter/ipvs/ip_vs_sh.c | 1 + net/netfilter/ipvs/ip_vs_wlc.c | 1 + net/netfilter/ipvs/ip_vs_wrr.c | 1 + 18 files changed, 186 insertions(+), 259 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 78a6634653a..f9f5b057b48 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -359,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, #define LeaveFunction(level) do {} while (0) #endif -#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); } - /* * The port number of FTP service (in network order). @@ -712,10 +710,9 @@ struct ip_vs_dest_user_kern { * and the forwarding entries */ struct ip_vs_service { - struct list_head s_list; /* for normal service table */ - struct list_head f_list; /* for fwmark-based service table */ + struct hlist_node s_list; /* for normal service table */ + struct hlist_node f_list; /* for fwmark-based service table */ atomic_t refcnt; /* reference counter */ - atomic_t usecnt; /* use counter */ u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ @@ -730,15 +727,16 @@ struct ip_vs_service { struct list_head destinations; /* real server d-linked list */ __u32 num_dests; /* number of servers */ struct ip_vs_stats stats; /* statistics for the service */ - struct ip_vs_app *inc; /* bind conns to this app inc */ /* for scheduling */ - struct ip_vs_scheduler *scheduler; /* bound scheduler object */ + struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */ spinlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ /* alternate persistence engine */ - struct ip_vs_pe *pe; + struct ip_vs_pe __rcu *pe; + + struct rcu_head rcu_head; }; /* Information for cached dst */ @@ -807,8 +805,6 @@ struct ip_vs_scheduler { int (*init_service)(struct ip_vs_service *svc); /* scheduling service finish */ void (*done_service)(struct ip_vs_service *svc); - /* scheduler updating service */ - int (*update_service)(struct ip_vs_service *svc); /* dest is linked */ int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); /* dest is unlinked */ @@ -1344,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc); extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); -void ip_vs_unbind_pe(struct ip_vs_service *svc); int register_ip_vs_pe(struct ip_vs_pe *pe); int unregister_ip_vs_pe(struct ip_vs_pe *pe); struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); @@ -1392,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler); -extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc); +extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc, + struct ip_vs_scheduler *sched); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * @@ -1412,14 +1407,9 @@ extern struct ip_vs_stats ip_vs_stats; extern int sysctl_ip_vs_sync_ver; extern struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport); -static inline void ip_vs_service_put(struct ip_vs_service *svc) -{ - atomic_dec(&svc->usecnt); -} - extern bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 939ad11ed53..79df3c61d4d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, { ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, vport, p); - p->pe = svc->pe; + p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -296,15 +296,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct)) { + struct ip_vs_scheduler *sched; + /* * No template found or the dest of the connection * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ - rcu_read_lock(); - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (!dest) { - rcu_read_unlock(); IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); *ignored = 0; @@ -320,7 +321,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * when the template expires */ ct = ip_vs_conn_new(¶m, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); - rcu_read_unlock(); if (ct == NULL) { kfree(param.pe_data); *ignored = -1; @@ -394,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; + struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; unsigned int flags; @@ -449,10 +450,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return NULL; } - rcu_read_lock(); - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (dest == NULL) { - rcu_read_unlock(); IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } @@ -473,7 +473,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); - rcu_read_unlock(); if (!cp) { *ignored = -1; return NULL; @@ -510,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) { - ip_vs_service_put(svc); return NF_DROP; } @@ -536,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; - ip_vs_service_put(svc); - /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { @@ -574,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { - ip_vs_service_put(svc); + if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) return NF_ACCEPT; - } - - ip_vs_service_put(svc); /* * Notify the client that the destination is unreachable, and diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0763cc6e092..9e4074c26dc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -55,9 +55,6 @@ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ static DEFINE_MUTEX(__ip_vs_mutex); -/* lock for service table */ -static DEFINE_RWLOCK(__ip_vs_svc_lock); - /* sysctl variables */ #ifdef CONFIG_IP_VS_DEBUG @@ -257,9 +254,9 @@ ip_vs_use_count_dec(void) #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) /* the service table hashed by */ -static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ -static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* @@ -314,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) */ hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, &svc->addr, svc->port); - list_add(&svc->s_list, &ip_vs_svc_table[hash]); + hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); - list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); + hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } svc->flags |= IP_VS_SVC_F_HASHED; @@ -344,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* Remove it from the svc_table table */ - list_del(&svc->s_list); + hlist_del_rcu(&svc->s_list); } else { /* Remove it from the svc_fwm_table table */ - list_del(&svc->f_list); + hlist_del_rcu(&svc->f_list); } svc->flags &= ~IP_VS_SVC_F_HASHED; @@ -369,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); - list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) @@ -396,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) /* Check for fwmark addressed entries */ hash = ip_vs_svc_fwm_hashkey(net, fwmark); - list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { if (svc->fwmark == fwmark && svc->af == af && net_eq(svc->net, net)) { /* HIT */ @@ -407,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) return NULL; } +/* Find service, called under RCU lock */ struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, - const union nf_inet_addr *vaddr, __be16 vport) +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; struct netns_ipvs *ipvs = net_ipvs(net); - read_lock(&__ip_vs_svc_lock); - /* * Check the table hashed by fwmark first */ @@ -451,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, } out: - if (svc) - atomic_inc(&svc->usecnt); - read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", fwmark, ip_vs_proto_name(protocol), IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), @@ -471,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) dest->svc = svc; } +static void ip_vs_service_free(struct ip_vs_service *svc) +{ + if (svc->stats.cpustats) + free_percpu(svc->stats.cpustats); + kfree(svc); +} + static void __ip_vs_unbind_svc(struct ip_vs_dest *dest) { @@ -478,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) dest->svc = NULL; if (atomic_dec_and_test(&svc->refcnt)) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + ip_vs_service_free(svc); } } @@ -608,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, struct ip_vs_service *svc; __be16 port = dport; - svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) @@ -616,7 +614,6 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, dest = ip_vs_lookup_dest(svc, daddr, port); if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); - ip_vs_service_put(svc); return dest; } @@ -774,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_scheduler *sched; int conn_flags; /* set the weight and the flags */ @@ -816,29 +814,17 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - if (add) - ip_vs_start_estimator(svc->net, &dest->stats); - - write_lock_bh(&__ip_vs_svc_lock); - - /* Wait until all other svc users go away */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - + sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { + ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (svc->scheduler->add_dest) - svc->scheduler->add_dest(svc, dest); + if (sched->add_dest) + sched->add_dest(svc, dest); } else { - if (svc->scheduler->upd_dest) - svc->scheduler->upd_dest(svc, dest); + if (sched->upd_dest) + sched->upd_dest(svc, dest); } - - /* call the update_service, because server weight may be changed */ - if (svc->scheduler->update_service) - svc->scheduler->update_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); } @@ -1071,14 +1057,13 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, list_del_rcu(&dest->n_list); svc->num_dests--; - if (svcupd && svc->scheduler->del_dest) - svc->scheduler->del_dest(svc, dest); + if (svcupd) { + struct ip_vs_scheduler *sched; - /* - * Call the update_service function of its scheduler - */ - if (svcupd && svc->scheduler->update_service) - svc->scheduler->update_service(svc); + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched->del_dest) + sched->del_dest(svc, dest); + } } @@ -1103,20 +1088,11 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ENOENT; } - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - /* * Unlink dest from the service */ __ip_vs_unlink_dest(svc, dest, 1); - write_unlock_bh(&__ip_vs_svc_lock); - /* * Delete the destination */ @@ -1207,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, } /* I'm the first user of the service */ - atomic_set(&svc->usecnt, 0); atomic_set(&svc->refcnt, 0); svc->af = u->af; @@ -1231,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, sched = NULL; /* Bind the ct retriever */ - ip_vs_bind_pe(svc, pe); + RCU_INIT_POINTER(svc->pe, pe); pe = NULL; /* Update the virtual service counters */ @@ -1247,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ipvs->num_services++; /* Hash the service into the service table */ - write_lock_bh(&__ip_vs_svc_lock); ip_vs_svc_hash(svc); - write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; /* Now there is a service - full throttle */ @@ -1259,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, out_err: if (svc != NULL) { - ip_vs_unbind_scheduler(svc); - if (svc->inc) { - local_bh_disable(); - ip_vs_app_inc_put(svc->inc); - local_bh_enable(); - } - if (svc->stats.cpustats) - free_percpu(svc->stats.cpustats); - kfree(svc); + ip_vs_unbind_scheduler(svc, sched); + ip_vs_service_free(svc); } ip_vs_scheduler_put(sched); ip_vs_pe_put(pe); @@ -1317,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } #endif - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched != old_sched) { + /* Bind the new scheduler */ + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + old_sched = sched; + goto out; + } + /* Unbind the old scheduler on success */ + ip_vs_unbind_scheduler(svc, old_sched); + } /* * Set the flags and timeout value @@ -1331,47 +1302,23 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; - old_sched = svc->scheduler; - if (sched != old_sched) { - /* - * Unbind the old scheduler - */ - ip_vs_unbind_scheduler(svc); + old_pe = rcu_dereference_protected(svc->pe, 1); + if (pe != old_pe) + rcu_assign_pointer(svc->pe, pe); - /* - * Bind the new scheduler - */ - if ((ret = ip_vs_bind_scheduler(svc, sched))) { - /* - * If ip_vs_bind_scheduler fails, restore the old - * scheduler. - * The main reason of failure is out of memory. - * - * The question is if the old scheduler can be - * restored all the time. TODO: if it cannot be - * restored some time, we must delete the service, - * otherwise the system may crash. - */ - ip_vs_bind_scheduler(svc, old_sched); - old_sched = sched; - goto out_unlock; - } - } - - old_pe = svc->pe; - if (pe != old_pe) { - ip_vs_unbind_pe(svc); - ip_vs_bind_pe(svc, pe); - } - -out_unlock: - write_unlock_bh(&__ip_vs_svc_lock); out: ip_vs_scheduler_put(old_sched); ip_vs_pe_put(old_pe); return ret; } +static void ip_vs_service_rcu_free(struct rcu_head *head) +{ + struct ip_vs_service *svc; + + svc = container_of(head, struct ip_vs_service, rcu_head); + ip_vs_service_free(svc); +} /* * Delete a service from the service list @@ -1394,21 +1341,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) ip_vs_stop_estimator(svc->net, &svc->stats); /* Unbind scheduler */ - old_sched = svc->scheduler; - ip_vs_unbind_scheduler(svc); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + ip_vs_unbind_scheduler(svc, old_sched); ip_vs_scheduler_put(old_sched); - /* Unbind persistence engine */ - old_pe = svc->pe; - ip_vs_unbind_pe(svc); + /* Unbind persistence engine, keep svc->pe */ + old_pe = rcu_dereference_protected(svc->pe, 1); ip_vs_pe_put(old_pe); - /* Unbind app inc */ - if (svc->inc) { - ip_vs_app_inc_put(svc->inc); - svc->inc = NULL; - } - /* * Unlink the whole destination list */ @@ -1428,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* * Free the service if nobody refers to it */ - if (atomic_read(&svc->refcnt) == 0) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + if (atomic_dec_and_test(&svc->refcnt)) { + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); } /* decrease the module use count */ @@ -1446,21 +1385,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) */ static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) { + /* Hold svc to avoid double release from dest_trash */ + atomic_inc(&svc->refcnt); /* * Unhash it from the service table */ - write_lock_bh(&__ip_vs_svc_lock); - ip_vs_svc_unhash(svc); - /* - * Wait until all the svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - __ip_vs_del_service(svc, cleanup); - - write_unlock_bh(&__ip_vs_svc_lock); } /* @@ -1482,14 +1414,15 @@ static int ip_vs_del_service(struct ip_vs_service *svc) static int ip_vs_flush(struct net *net, bool cleanup) { int idx; - struct ip_vs_service *svc, *nxt; + struct ip_vs_service *svc; + struct hlist_node *n; /* * Flush the service table hashed by */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], - s_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], + s_list) { if (net_eq(svc->net, net)) ip_vs_unlink_service(svc, cleanup); } @@ -1499,8 +1432,8 @@ static int ip_vs_flush(struct net *net, bool cleanup) * Flush the service table hashed by fwmark */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, - &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net)) ip_vs_unlink_service(svc, cleanup); } @@ -1558,7 +1491,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, EnterFunction(2); mutex_lock(&__ip_vs_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { @@ -1567,7 +1500,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } } - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { @@ -1595,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) { struct ip_vs_dest *dest; - write_lock_bh(&__ip_vs_svc_lock); list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_zero_stats(&dest->stats); } ip_vs_zero_stats(&svc->stats); - write_unlock_bh(&__ip_vs_svc_lock); return 0; } @@ -1610,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net) struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } @@ -1945,7 +1876,7 @@ static struct ctl_table vs_vars[] = { struct ip_vs_iter { struct seq_net_private p; /* Do not move this, netns depends upon it*/ - struct list_head *table; + struct hlist_head *table; int bucket; }; @@ -1978,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; @@ -1989,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; @@ -2002,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) -__acquires(__ip_vs_svc_lock) { - read_lock_bh(&__ip_vs_svc_lock); + rcu_read_lock(); return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *e; + struct hlist_node *e; struct ip_vs_iter *iter; struct ip_vs_service *svc; @@ -2025,13 +1956,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (iter->table == ip_vs_svc_table) { /* next service in table hashed by protocol */ - if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, s_list); - + e = rcu_dereference(hlist_next_rcu(&svc->s_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, s_list); while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], - s_list) { + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_table[iter->bucket], + s_list) { return svc; } } @@ -2042,13 +1974,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } /* next service in hashed by fwmark */ - if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, f_list); + e = rcu_dereference(hlist_next_rcu(&svc->f_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, f_list); scan_fwmark: while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], - f_list) + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_fwm_table[iter->bucket], + f_list) return svc; } @@ -2056,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) -__releases(__ip_vs_svc_lock) { - read_unlock_bh(&__ip_vs_svc_lock); + rcu_read_unlock(); } @@ -2076,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -2084,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - svc->scheduler->name); + sched->name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name, + sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, svc->scheduler->name, + svc->fwmark, sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2451,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } /* Lookup the exact service by or fwmark */ + rcu_read_lock(); if (usvc.fwmark == 0) svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); + rcu_read_unlock(); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2507,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference_protected(src->scheduler, 1); dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2530,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net, int ret = 0; for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2549,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net, } for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2578,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, union nf_inet_addr addr = { .ip = get->addr }; int ret = 0; + rcu_read_lock(); if (get->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); + rcu_read_unlock(); if (svc) { int count = 0; @@ -2765,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; + rcu_read_lock(); if (entry->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else svc = __ip_vs_service_find(net, AF_INET, entry->protocol, &addr, entry->port); + rcu_read_unlock(); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2927,6 +2870,7 @@ nla_put_failure: static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_service *svc) { + struct ip_vs_scheduler *sched; struct nlattr *nl_service; struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; @@ -2947,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, goto nla_put_failure; } - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || + sched = rcu_dereference_protected(svc->scheduler, 1); + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || (svc->pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || @@ -2998,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3009,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3069,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net, usvc->fwmark = 0; } + rcu_read_lock(); if (usvc->fwmark) svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); + rcu_read_unlock(); *ret_svc = svc; /* If a full entry was requested, check for the additional fields */ @@ -3905,8 +3852,8 @@ int __init ip_vs_control_init(void) /* Initialize svc_table, ip_vs_svc_fwm_table */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); } smp_wmb(); /* Do we really need it now ? */ diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 89c27230d93..ccab120df45 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -269,6 +269,7 @@ static int __init ip_vs_dh_init(void) static void __exit ip_vs_dh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index ffef8a16214..d8e5238254d 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -633,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); unregister_pernet_subsys(&ip_vs_lblc_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index cdfe6a95edd..041b7cc356f 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -821,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); unregister_pernet_subsys(&ip_vs_lblcr_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 0cabf78fbc3..5128e338a74 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void) static void __exit ip_vs_lc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); + synchronize_rcu(); } module_init(ip_vs_lc_init); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 51dc0cf20d9..646cfd4baa7 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void) static void __exit ip_vs_nq_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); + synchronize_rcu(); } module_init(ip_vs_nq_init); diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 5d9774c4cc4..1a82b29ce8e 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -16,18 +16,6 @@ static LIST_HEAD(ip_vs_pe); /* semaphore for IPVS PEs. */ static DEFINE_MUTEX(ip_vs_pe_mutex); -/* Bind a service with a pe */ -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe) -{ - svc->pe = pe; -} - -/* Unbind a service from its pe */ -void ip_vs_unbind_pe(struct ip_vs_service *svc) -{ - svc->pe = NULL; -} - /* Get pe in the pe list by name */ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index f7190cdf023..4de5176a998 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + rcu_read_lock(); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, sh->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, sh->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 0bbc3feae68..7de3342e979 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, } net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ + rcu_read_lock(); if (th->syn && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, th->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, th->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 1a03e2d9c6b..b62a3c0ff9b 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); - svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, uh->dest); + rcu_read_lock(); + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, uh->dest); if (svc) { int ignored; @@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index aa4601ff1cc..749c98a7dd2 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -121,6 +121,7 @@ static int __init ip_vs_rr_init(void) static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); + synchronize_rcu(); } module_init(ip_vs_rr_init); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 1b715d0caf4..4dbcda6258b 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, { int ret; - svc->scheduler = scheduler; - if (scheduler->init_service) { ret = scheduler->init_service(svc); if (ret) { @@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, return ret; } } - + rcu_assign_pointer(svc->scheduler, scheduler); return 0; } @@ -64,17 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -void ip_vs_unbind_scheduler(struct ip_vs_service *svc) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc, + struct ip_vs_scheduler *sched) { - struct ip_vs_scheduler *sched = svc->scheduler; + struct ip_vs_scheduler *cur_sched; - if (!sched) + cur_sched = rcu_dereference_protected(svc->scheduler, 1); + /* This check proves that old 'sched' was installed */ + if (!cur_sched) return; if (sched->done_service) sched->done_service(svc); - - svc->scheduler = NULL; + /* svc->scheduler can not be set to NULL */ } @@ -148,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - svc->scheduler->name, svc->fwmark, - svc->fwmark, msg); + sched->name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index d01187084b7..f3205925359 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void) static void __exit ip_vs_sed_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); + synchronize_rcu(); } module_init(ip_vs_sed_init); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 81c1a10c7b4..0df269d7c99 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -283,6 +283,7 @@ static int __init ip_vs_sh_init(void) static void __exit ip_vs_sh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index dafae881c62..c60a81c4ce9 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void) static void __exit ip_vs_wlc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); + synchronize_rcu(); } module_init(ip_vs_wlc_init); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index b173ef907a1..32c646eb874 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -261,6 +261,7 @@ static int __init ip_vs_wrr_init(void) static void __exit ip_vs_wrr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); + synchronize_rcu(); } module_init(ip_vs_wrr_init); -- cgit v1.2.3-70-g09d2 From 8746ddcf12bb263ad240e095ef16531006caeb50 Mon Sep 17 00:00:00 2001 From: "holger@eitzenberger.org" Date: Sat, 23 Mar 2013 10:04:03 +0000 Subject: netfilter: xt_NFQUEUE: introduce CPU fanout Current NFQUEUE target uses a hash, computed over source and destination address (and other parameters), for steering the packet to the actual NFQUEUE. This, however forgets about the fact that the packet eventually is handled by a particular CPU on user request. If E. g. 1) IRQ affinity is used to handle packets on a particular CPU already (both single-queue or multi-queue case) and/or 2) RPS is used to steer packets to a specific softirq the target easily chooses an NFQUEUE which is not handled by a process pinned to the same CPU. The idea is therefore to use the CPU index for determining the NFQUEUE handling the packet. E. g. when having a system with 4 CPUs, 4 MQ queues and 4 NFQUEUEs it looks like this: +-----+ +-----+ +-----+ +-----+ |NFQ#0| |NFQ#1| |NFQ#2| |NFQ#3| +-----+ +-----+ +-----+ +-----+ ^ ^ ^ ^ | |NFQUEUE | | + + + + +-----+ +-----+ +-----+ +-----+ |rx-0 | |rx-1 | |rx-2 | |rx-3 | +-----+ +-----+ +-----+ +-----+ The NFQUEUEs not necessarily have to start with number 0, setups with less NFQUEUEs than packet-handling CPUs are not a problem as well. This patch extends the NFQUEUE target to accept a new NFQ_FLAG_CPU_FANOUT flag. If this is specified the target uses the CPU index for determining the NFQUEUE being used. I have to introduce rev3 for this. The 'flags' are folded into _v2 'bypass'. By changing the way which queue is assigned, I'm able to improve the performance if the processes reading on the NFQUEUs are pinned correctly. Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_NFQUEUE.h | 9 +++++++ net/netfilter/xt_NFQUEUE.c | 41 +++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_NFQUEUE.h b/include/uapi/linux/netfilter/xt_NFQUEUE.h index 9eafdbbb401..8bb5fe657d3 100644 --- a/include/uapi/linux/netfilter/xt_NFQUEUE.h +++ b/include/uapi/linux/netfilter/xt_NFQUEUE.h @@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 { __u16 bypass; }; +struct xt_NFQ_info_v3 { + __u16 queuenum; + __u16 queues_total; + __u16 flags; +#define NFQ_FLAG_BYPASS 0x01 /* for compatibility with v2 */ +#define NFQ_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ +#define NFQ_FLAG_MASK 0x03 +}; + #endif /* _XT_NFQ_TARGET_H */ diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 817f9e9f2b1..a287ef262d4 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -108,7 +108,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) static int nfqueue_tg_check(const struct xt_tgchk_param *par) { - const struct xt_NFQ_info_v2 *info = par->targinfo; + const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; if (unlikely(!rnd_inited)) { @@ -125,11 +125,39 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) info->queues_total, maxid); return -ERANGE; } - if (par->target->revision == 2 && info->bypass > 1) + if (par->target->revision == 2 && info->flags > 1) + return -EINVAL; + if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK) return -EINVAL; + return 0; } +static unsigned int +nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_NFQ_info_v3 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) { + if (info->flags & NFQ_FLAG_CPU_FANOUT) { + int cpu = smp_processor_id(); + + queue = info->queuenum + cpu % info->queues_total; + } else { + if (par->family == NFPROTO_IPV4) + queue = (((u64) hash_v4(skb) * info->queues_total) >> + 32) + queue; +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + else if (par->family == NFPROTO_IPV6) + queue = (((u64) hash_v6(skb) * info->queues_total) >> + 32) + queue; +#endif + } + } + return NF_QUEUE_NR(queue); +} + static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", @@ -156,6 +184,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_NFQ_info_v2), .me = THIS_MODULE, }, + { + .name = "NFQUEUE", + .revision = 3, + .family = NFPROTO_UNSPEC, + .checkentry = nfqueue_tg_check, + .target = nfqueue_tg_v3, + .targetsize = sizeof(struct xt_NFQ_info_v3), + .me = THIS_MODULE, + }, }; static int __init nfqueue_tg_init(void) -- cgit v1.2.3-70-g09d2 From 152b0f5da798c56566737f4d0bd85f69688e7d7b Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Tue, 2 Apr 2013 08:12:11 +0200 Subject: netfilter: fix struct ip6t_frag field description Signed-off-by: Michal Kubecek Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_ipv6/ip6t_frag.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h index b47f61b9e08..dfd8bc2268c 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h @@ -4,9 +4,9 @@ #include struct ip6t_frag { - __u32 ids[2]; /* Security Parameter Index */ + __u32 ids[2]; /* Identification range */ __u32 hdrlen; /* Header Length */ - __u8 flags; /* */ + __u8 flags; /* Flags */ __u8 invflags; /* Inverse flags */ }; -- cgit v1.2.3-70-g09d2 From 8466563e16d5198b6efeb3b51791b95b6aaacb6b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 2 Apr 2013 13:13:07 -0400 Subject: tcp: Remove dead sysctl_tcp_cookie_size declaration Remove a declaration left over from the TCPCT-ectomy. This sysctl is no longer referenced anywhere since 1a2c6181c4 ("tcp: Remove TCPCT"). Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d1dcb596230..4475aaf0af5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -282,7 +282,6 @@ extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; -extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; -- cgit v1.2.3-70-g09d2 From 65b3841b9cb5fe1b239f12dbf033f9827d73d032 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 2 Apr 2013 09:35:07 +0000 Subject: of_net.h: Provide empty functions if OF_NET is not configured of_get_mac_address() and of_get_phy_mode() are only provided if OF_NET is configured. While most callers check for the define, not all do, and those who do require #ifdef around the code. For those who don't, the missing check can result in errors such as arch/powerpc/sysdev/tsi108_dev.c:107:3: error: implicit declaration of function 'of_get_mac_address' [-Werror=implicit-function-declaration] arch/powerpc/sysdev/mv64x60_dev.c:253:2: error: implicit declaration of function 'of_get_mac_address' [-Werror=implicit-function-declaration] Provide empty functions if OF_NET is not configured. This is safe because all callers do check the return values. Cc: David Daney Signed-off-by: Guenter Roeck Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/linux/of_net.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index f4746418871..61bf53b0277 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -11,6 +11,16 @@ #include extern const int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); +#else +static inline const int of_get_phy_mode(struct device_node *np) +{ + return -ENODEV; +} + +static inline const void *of_get_mac_address(struct device_node *np) +{ + return NULL; +} #endif #endif /* __LINUX_OF_NET_H */ -- cgit v1.2.3-70-g09d2 From d4299ce6b33c0afd22cf6a170cfaf89c63d1114d Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Wed, 27 Mar 2013 20:04:57 -0300 Subject: Bluetooth: Remove unneeded hci_req_cmd_status function This patch removes the hci_req_cmd_status function since it is not used anymore. The HCI request framework now considers the HCI command has complete once the Command Status or Command Complete Event is received. Signed-off-by: Andre Guedes Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 26 -------------------------- 2 files changed, 27 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 358a6983d3b..0e7ee892d7a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1055,7 +1055,6 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); -void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 123992984a7..a199d631e31 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3322,32 +3322,6 @@ call_complete: req_complete(hdev, status); } -void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status) -{ - hci_req_complete_t req_complete = NULL; - - BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); - - if (status) { - hci_req_cmd_complete(hdev, opcode, status); - return; - } - - /* No need to handle success status if there are more commands */ - if (!hci_req_is_complete(hdev)) - return; - - if (hdev->sent_cmd) - req_complete = bt_cb(hdev->sent_cmd)->req.complete; - - /* If the request doesn't have a complete callback or there - * are other commands/requests in the hdev queue we consider - * this request as completed. - */ - if (!req_complete || !skb_queue_empty(&hdev->cmd_q)) - hci_req_cmd_complete(hdev, opcode, status); -} - static void hci_rx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); -- cgit v1.2.3-70-g09d2 From b6ddb638235d90ed67af9af40e63880fd66a1939 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 2 Apr 2013 13:34:31 +0300 Subject: Bluetooth: Track received events in hdev This patch adds tracking of received HCI events to the hci_dev struct. This is necessary so that a subsequent patch can implement a function for sending a single command synchronously and returning the resulting command complete parameters in the function return value. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 3 +++ net/bluetooth/hci_event.c | 12 ++++++++++++ 3 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0e7ee892d7a..89eda2ef238 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -244,6 +244,7 @@ struct hci_dev { struct sk_buff_head raw_q; struct sk_buff_head cmd_q; + struct sk_buff *recv_evt; struct sk_buff *sent_cmd; struct sk_buff *reassembly[NUM_REASSEMBLY]; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a199d631e31..7c323bd112f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1136,6 +1136,9 @@ static int hci_dev_do_close(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + kfree_skb(hdev->recv_evt); + hdev->recv_evt = NULL; + /* After this point our queues are empty * and no tasks are scheduled. */ hdev->close(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7e7fbca5943..ed0efb7255b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3699,6 +3699,18 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) struct hci_event_hdr *hdr = (void *) skb->data; __u8 event = hdr->evt; + hci_dev_lock(hdev); + + /* Received events are (currently) only needed when a request is + * ongoing so avoid unnecessary memory allocation. + */ + if (hdev->req_status == HCI_REQ_PEND) { + kfree_skb(hdev->recv_evt); + hdev->recv_evt = skb_clone(skb, GFP_KERNEL); + } + + hci_dev_unlock(hdev); + skb_pull(skb, HCI_EVENT_HDR_SIZE); switch (event) { -- cgit v1.2.3-70-g09d2 From 75e84b7c522c6e07964cd1f5bf28535768a1e9fa Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 2 Apr 2013 13:35:04 +0300 Subject: Bluetooth: Add __hci_cmd_sync() helper function This patch adds a helper function for sending a single HCI command waiting for its completion and then returning back the parameters in the resulting command complete event (if there was one). The implementation is very similar to that of hci_req_sync() except that instead of invocing a callback for sending HCI commands the function constructs and sends one itself and after being woken up picks the last received event from hdev->recv_evt (if it matches the right criteria) and returns it. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 ++ net/bluetooth/hci_core.c | 102 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 89eda2ef238..755743d508a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1057,6 +1057,9 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u32 timeout); + int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7c323bd112f..8b2d543fb14 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -79,6 +79,108 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) } } +struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode) +{ + struct hci_ev_cmd_complete *ev; + struct hci_event_hdr *hdr; + struct sk_buff *skb; + + hci_dev_lock(hdev); + + skb = hdev->recv_evt; + hdev->recv_evt = NULL; + + hci_dev_unlock(hdev); + + if (!skb) + return ERR_PTR(-ENODATA); + + if (skb->len < sizeof(*hdr)) { + BT_ERR("Too short HCI event"); + goto failed; + } + + hdr = (void *) skb->data; + skb_pull(skb, HCI_EVENT_HDR_SIZE); + + if (hdr->evt != HCI_EV_CMD_COMPLETE) { + BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); + goto failed; + } + + if (skb->len < sizeof(*ev)) { + BT_ERR("Too short cmd_complete event"); + goto failed; + } + + ev = (void *) skb->data; + skb_pull(skb, sizeof(*ev)); + + if (opcode == __le16_to_cpu(ev->opcode)) + return skb; + + BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode, + __le16_to_cpu(ev->opcode)); + +failed: + kfree_skb(skb); + return ERR_PTR(-ENODATA); +} + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u32 timeout) +{ + DECLARE_WAITQUEUE(wait, current); + struct hci_request req; + int err = 0; + + BT_DBG("%s", hdev->name); + + hci_req_init(&req, hdev); + + hci_req_add(&req, opcode, plen, param); + + hdev->req_status = HCI_REQ_PEND; + + err = hci_req_run(&req, hci_req_sync_complete); + if (err < 0) + return ERR_PTR(err); + + add_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + schedule_timeout(timeout); + + remove_wait_queue(&hdev->req_wait_q, &wait); + + if (signal_pending(current)) + return ERR_PTR(-EINTR); + + switch (hdev->req_status) { + case HCI_REQ_DONE: + err = -bt_to_errno(hdev->req_result); + break; + + case HCI_REQ_CANCELED: + err = -hdev->req_result; + break; + + default: + err = -ETIMEDOUT; + break; + } + + hdev->req_status = hdev->req_result = 0; + + BT_DBG("%s end: err %d", hdev->name, err); + + if (err < 0) + return ERR_PTR(err); + + return hci_get_cmd_complete(hdev, opcode); +} +EXPORT_SYMBOL(__hci_cmd_sync); + /* Execute request and wait for completion. */ static int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, -- cgit v1.2.3-70-g09d2 From 02350a725f5bc44490c30a10e7e04a12a5ecd406 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 3 Apr 2013 21:50:29 +0300 Subject: Bluetooth: Add support for custom event terminated commands This patch adds support for having commands within HCI requests that do not result in a command complete but some other event. This is at least needed for some vendor specific commands to be issued in the hdev->setup() procecure, but might also be useful for other commands. The way that the support is implemented is by extending the skb control buffer to have a field to indicate that the command is expected to terminate with a special event. After sending the command each received event can then be compared against this field through hdev->sent_cmd. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 1 + include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_core.c | 10 +++++++++- net/bluetooth/hci_event.c | 11 ++++++++++- 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ed6e9552252..591fee7d006 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -266,6 +266,7 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status); struct hci_req_ctrl { bool start; + u8 event; hci_req_complete_t complete; }; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 755743d508a..b85eefb230f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1055,6 +1055,8 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, + u8 event); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8b2d543fb14..7f1413cae2c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2645,7 +2645,8 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) } /* Queue a command to an asynchronous HCI request */ -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, + u8 event) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -2669,9 +2670,16 @@ void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) if (skb_queue_empty(&req->cmd_q)) bt_cb(skb)->req.start = true; + bt_cb(skb)->req.event = event; + skb_queue_tail(&req->cmd_q, skb); } +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +{ + hci_req_add_ev(req, opcode, plen, param, 0); +} + /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ed0efb7255b..0a2b128d2cc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2463,7 +2463,9 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); - hci_req_cmd_complete(hdev, opcode, ev->status); + if (ev->status || + (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) + hci_req_cmd_complete(hdev, opcode, ev->status); if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); @@ -3713,6 +3715,13 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) skb_pull(skb, HCI_EVENT_HDR_SIZE); + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) { + struct hci_command_hdr *hdr = (void *) hdev->sent_cmd->data; + u16 opcode = __le16_to_cpu(hdr->opcode); + + hci_req_cmd_complete(hdev, opcode, 0); + } + switch (event) { case HCI_EV_INQUIRY_COMPLETE: hci_inquiry_complete_evt(hdev, skb); -- cgit v1.2.3-70-g09d2 From 7b1abbbed0f2a1bc19bb8c0d48a284466043092a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 3 Apr 2013 21:54:47 +0300 Subject: Bluetooth: Add __hci_cmd_sync_ev function This patch adds a __hci_cmd_sync_ev function, analogous to __hci_cmd_sync except that it also takes an event parameter to indicate that the command completes with a special event instead of command complete. Internally this new function takes advantage of the hci_req_add_ev function introduced in the previous patch. The primary expected user of this new function are the setup routines of HCI drivers which may want to send custom commands and return only when they have completed. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_core.c | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b85eefb230f..47129b1ee20 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1061,6 +1061,8 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, void *param, u32 timeout); +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u8 event, u32 timeout); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7f1413cae2c..9567e32a1f0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -79,7 +79,7 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) } } -struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode) +struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 event) { struct hci_ev_cmd_complete *ev; struct hci_event_hdr *hdr; @@ -103,6 +103,12 @@ struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode) hdr = (void *) skb->data; skb_pull(skb, HCI_EVENT_HDR_SIZE); + if (event) { + if (hdr->evt != event) + goto failed; + return skb; + } + if (hdr->evt != HCI_EV_CMD_COMPLETE) { BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); goto failed; @@ -127,8 +133,8 @@ failed: return ERR_PTR(-ENODATA); } -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u32 timeout) +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u8 event, u32 timeout) { DECLARE_WAITQUEUE(wait, current); struct hci_request req; @@ -138,7 +144,7 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, hci_req_init(&req, hdev); - hci_req_add(&req, opcode, plen, param); + hci_req_add_ev(&req, opcode, plen, param, event); hdev->req_status = HCI_REQ_PEND; @@ -177,7 +183,14 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, if (err < 0) return ERR_PTR(err); - return hci_get_cmd_complete(hdev, opcode); + return hci_get_cmd_complete(hdev, opcode, event); +} +EXPORT_SYMBOL(__hci_cmd_sync_ev); + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + void *param, u32 timeout) +{ + return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); } EXPORT_SYMBOL(__hci_cmd_sync); -- cgit v1.2.3-70-g09d2 From f41c70c4d5e3f6c2a7f9e5dfc10af452591a2484 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 12 Nov 2012 14:02:14 +0900 Subject: Bluetooth: Add driver setup stage for early init Some drivers require a special stage for their early init. This is always specific to the driver or transport. So call back into driver to allow bringing up the device. The advantage with this stage is that the Bluetooth core is actually handling the HCI layer now. This means that command and event processing is available. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 33 ++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 47129b1ee20..395e8f6982f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -293,6 +293,7 @@ struct hci_dev { int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); int (*flush)(struct hci_dev *hdev); + int (*setup)(struct hci_dev *hdev); int (*send)(struct sk_buff *skb); void (*notify)(struct hci_dev *hdev, unsigned int evt); int (*ioctl)(struct hci_dev *hdev, unsigned int cmd, unsigned long arg); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9567e32a1f0..0f00b8bc279 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1127,26 +1127,33 @@ int hci_dev_open(__u16 dev) goto done; } - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) - set_bit(HCI_RAW, &hdev->flags); - - /* Treat all non BR/EDR controllers as raw devices if - enable_hs is not set */ - if (hdev->dev_type != HCI_BREDR && !enable_hs) - set_bit(HCI_RAW, &hdev->flags); - if (hdev->open(hdev)) { ret = -EIO; goto done; } - if (!test_bit(HCI_RAW, &hdev->flags)) { - atomic_set(&hdev->cmd_cnt, 1); - set_bit(HCI_INIT, &hdev->flags); - ret = __hci_init(hdev); - clear_bit(HCI_INIT, &hdev->flags); + atomic_set(&hdev->cmd_cnt, 1); + set_bit(HCI_INIT, &hdev->flags); + + if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags)) + ret = hdev->setup(hdev); + + if (!ret) { + /* Treat all non BR/EDR controllers as raw devices if + * enable_hs is not set. + */ + if (hdev->dev_type != HCI_BREDR && !enable_hs) + set_bit(HCI_RAW, &hdev->flags); + + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + set_bit(HCI_RAW, &hdev->flags); + + if (!test_bit(HCI_RAW, &hdev->flags)) + ret = __hci_init(hdev); } + clear_bit(HCI_INIT, &hdev->flags); + if (!ret) { hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); -- cgit v1.2.3-70-g09d2 From 5afff03815e26abf34702ec10422535224cdfe38 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 12 Nov 2012 14:02:16 +0900 Subject: Bluetooth: Remove driver init queue from core The driver init queue is no longer needed. This can be all handled inside the drivers now. So remove it. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 -- net/bluetooth/hci_core.c | 23 ----------------------- 2 files changed, 25 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 395e8f6982f..d4e13bf5ae5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -269,8 +269,6 @@ struct hci_dev { struct hci_dev_stats stat; - struct sk_buff_head driver_init; - atomic_t promisc; struct dentry *debugfs; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0f00b8bc279..9570358adb7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -316,29 +316,9 @@ static void amp_init(struct hci_request *req) static void hci_init1_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; - struct hci_request init_req; - struct sk_buff *skb; BT_DBG("%s %ld", hdev->name, opt); - /* Driver initialization */ - - hci_req_init(&init_req, hdev); - - /* Special commands */ - while ((skb = skb_dequeue(&hdev->driver_init))) { - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - skb->dev = (void *) hdev; - - if (skb_queue_empty(&init_req.cmd_q)) - bt_cb(skb)->req.start = true; - - skb_queue_tail(&init_req.cmd_q, skb); - } - skb_queue_purge(&hdev->driver_init); - - hci_req_run(&init_req, NULL); - /* Reset */ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) hci_reset_req(req, 0); @@ -2144,7 +2124,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); - skb_queue_head_init(&hdev->driver_init); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); @@ -2163,8 +2142,6 @@ EXPORT_SYMBOL(hci_alloc_dev); /* Free HCI device */ void hci_free_dev(struct hci_dev *hdev) { - skb_queue_purge(&hdev->driver_init); - /* will free via device release */ put_device(&hdev->dev); } -- cgit v1.2.3-70-g09d2 From 19952cc4f8f572493293a8caed27c4be89c5fc9d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 3 Apr 2013 23:38:16 +0000 Subject: net: frag queue per hash bucket locking This patch implements per hash bucket locking for the frag queue hash. This removes two write locks, and the only remaining write lock is for protecting hash rebuild. This essentially reduce the readers-writer lock to a rebuild lock. This patch is part of "net: frag performance followup" http://thread.gmane.org/gmane.linux.network/263644 of which two patches have already been accepted: Same test setup as previous: (http://thread.gmane.org/gmane.linux.network/257155) Two 10G interfaces, on seperate NUMA nodes, are under-test, and uses Ethernet flow-control. A third interface is used for generating the DoS attack (with trafgen). Notice, I have changed the frag DoS generator script to be more efficient/deadly. Before it would only hit one RX queue, now its sending packets causing multi-queue RX, due to "better" RX hashing. Test types summary (netperf UDP_STREAM): Test-20G64K == 2x10G with 65K fragments Test-20G3F == 2x10G with 3x fragments (3*1472 bytes) Test-20G64K+DoS == Same as 20G64K with frag DoS Test-20G3F+DoS == Same as 20G3F with frag DoS Test-20G64K+MQ == Same as 20G64K with Multi-Queue frag DoS Test-20G3F+MQ == Same as 20G3F with Multi-Queue frag DoS When I rebased this-patch(03) (on top of net-next commit a210576c) and removed the _bh spinlock, I saw a performance regression. BUT this was caused by some unrelated change in-between. See tests below. Test (A) is what I reported before for patch-02, accepted in commit 1b5ab0de. Test (B) verifying-retest of commit 1b5ab0de corrospond to patch-02. Test (C) is what I reported before for this-patch Test (D) is net-next master HEAD (commit a210576c), which reveals some (unknown) performance regression (compared against test (B)). Test (D) function as a new base-test. Performance table summary (in Mbit/s): (#) Test-type: 20G64K 20G3F 20G64K+DoS 20G3F+DoS 20G64K+MQ 20G3F+MQ ---------- ------- ------- ---------- --------- -------- ------- (A) Patch-02 : 18848.7 13230.1 4103.04 5310.36 130.0 440.2 (B) 1b5ab0de : 18841.5 13156.8 4101.08 5314.57 129.0 424.2 (C) Patch-03v1: 18838.0 13490.5 4405.11 6814.72 196.6 461.6 (D) a210576c : 18321.5 11250.4 3635.34 5160.13 119.1 405.2 (E) with _bh : 17247.3 11492.6 3994.74 6405.29 166.7 413.6 (F) without bh: 17471.3 11298.7 3818.05 6102.11 165.7 406.3 Test (E) and (F) is this-patch(03), with(V1) and without(V2) the _bh spinlocks. I cannot explain the slow down for 20G64K (but its an artificial "lab-test" so I'm not worried). But the other results does show improvements. And test (E) "with _bh" version is slightly better. Signed-off-by: Jesper Dangaard Brouer Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet ---- V2: - By analysis from Hannes Frederic Sowa and Eric Dumazet, we don't need the spinlock _bh versions, as Netfilter currently does a local_bh_disable() before entering inet_fragment. - Fold-in desc from cover-mail V3: - Drop the chain_len counter per hash bucket. Signed-off-by: David S. Miller --- include/net/inet_frag.h | 8 ++++++- net/ipv4/inet_fragment.c | 57 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 7cac9c5789b..6f41b45e819 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -50,10 +50,16 @@ struct inet_frag_queue { */ #define INETFRAGS_MAXDEPTH 128 +struct inet_frag_bucket { + struct hlist_head chain; + spinlock_t chain_lock; +}; + struct inet_frags { - struct hlist_head hash[INETFRAGS_HASHSZ]; + struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; /* This rwlock is a global lock (seperate per IPv4, IPv6 and * netfilter). Important to keep this on a seperate cacheline. + * Its primarily a rebuild protection rwlock. */ rwlock_t lock ____cacheline_aligned_in_smp; int secret_interval; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 1206ca64b0e..e97d66a1fdd 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -52,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy) unsigned long now = jiffies; int i; + /* Per bucket lock NOT needed here, due to write lock protection */ write_lock(&f->lock); + get_random_bytes(&f->rnd, sizeof(u32)); for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; struct hlist_node *n; - hlist_for_each_entry_safe(q, n, &f->hash[i], list) { + hb = &f->hash[i]; + hlist_for_each_entry_safe(q, n, &hb->chain, list) { unsigned int hval = f->hashfn(q); if (hval != i) { + struct inet_frag_bucket *hb_dest; + hlist_del(&q->list); /* Relink to new hash chain. */ - hlist_add_head(&q->list, &f->hash[hval]); + hb_dest = &f->hash[hval]; + hlist_add_head(&q->list, &hb_dest->chain); } } } @@ -78,9 +85,12 @@ void inet_frags_init(struct inet_frags *f) { int i; - for (i = 0; i < INETFRAGS_HASHSZ; i++) - INIT_HLIST_HEAD(&f->hash[i]); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb = &f->hash[i]; + spin_lock_init(&hb->chain_lock); + INIT_HLIST_HEAD(&hb->chain); + } rwlock_init(&f->lock); f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ @@ -122,9 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net); static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { - write_lock(&f->lock); + struct inet_frag_bucket *hb; + unsigned int hash; + + read_lock(&f->lock); + hash = f->hashfn(fq); + hb = &f->hash[hash]; + + spin_lock(&hb->chain_lock); hlist_del(&fq->list); - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + + read_unlock(&f->lock); inet_frag_lru_del(fq); } @@ -226,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, void *arg) { + struct inet_frag_bucket *hb; struct inet_frag_queue *qp; #ifdef CONFIG_SMP #endif unsigned int hash; - write_lock(&f->lock); + read_lock(&f->lock); /* Protects against hash rebuild */ /* * While we stayed w/o the lock other CPU could update * the rnd seed, so we need to re-calculate the hash * chain. Fortunatelly the qp_in can be used to get one. */ hash = f->hashfn(qp_in); + hb = &f->hash[hash]; + spin_lock(&hb->chain_lock); + #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we - * promoted read lock to write lock. + * released the hash bucket lock. */ - hlist_for_each_entry(qp, &f->hash[hash], list) { + hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); qp_in->last_in |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); return qp; @@ -258,8 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &f->hash[hash]); - write_unlock(&f->lock); + hlist_add_head(&qp->list, &hb->chain); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); inet_frag_lru_add(nf, qp); return qp; } @@ -300,17 +325,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; int depth = 0; - hlist_for_each_entry(q, &f->hash[hash], list) { + hb = &f->hash[hash]; + + spin_lock(&hb->chain_lock); + hlist_for_each_entry(q, &hb->chain, list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); return q; } depth++; } + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); if (depth <= INETFRAGS_MAXDEPTH) -- cgit v1.2.3-70-g09d2 From f3c1a44a2208d14b061ad665d9549c9b321f38e5 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sun, 24 Mar 2013 23:50:39 +0000 Subject: netfilter: make /proc/net/netfilter pernet This patch makes this proc dentry pernet. So far only init_net had a /proc/net/netfilter directory. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 2 ++ include/net/netns/netfilter.h | 11 +++++++++++ net/netfilter/core.c | 33 +++++++++++++++++++++++++++++---- 3 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 include/net/netns/netfilter.h (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index de644bcd861..b1769782748 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include @@ -94,6 +95,7 @@ struct net { struct netns_dccp dccp; #endif #ifdef CONFIG_NETFILTER + struct netns_nf nf; struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h new file mode 100644 index 00000000000..248ca1c68db --- /dev/null +++ b/include/net/netns/netfilter.h @@ -0,0 +1,11 @@ +#ifndef __NETNS_NETFILTER_H +#define __NETNS_NETFILTER_H + +#include + +struct netns_nf { +#if defined CONFIG_PROC_FS + struct proc_dir_entry *proc_netfilter; +#endif +}; +#endif diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a9c488b6c50..b085184d9b4 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -281,6 +281,34 @@ struct proc_dir_entry *proc_net_netfilter; EXPORT_SYMBOL(proc_net_netfilter); #endif +static int __net_init netfilter_net_init(struct net *net) +{ +#ifdef CONFIG_PROC_FS + net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", + net->proc_net); + if (net_eq(net, &init_net)) { + if (!net->nf.proc_netfilter) + return -ENOMEM; + else + proc_net_netfilter = net->nf.proc_netfilter; + } else if (!net->nf.proc_netfilter) { + pr_err("cannot create netfilter proc entry"); + return -ENOMEM; + } +#endif + return 0; +} + +static void __net_exit netfilter_net_exit(struct net *net) +{ + remove_proc_entry("netfilter", net->proc_net); +} + +static struct pernet_operations netfilter_net_ops = { + .init = netfilter_net_init, + .exit = netfilter_net_exit, +}; + void __init netfilter_init(void) { int i, h; @@ -289,11 +317,8 @@ void __init netfilter_init(void) INIT_LIST_HEAD(&nf_hooks[i][h]); } -#ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net); - if (!proc_net_netfilter) + if (register_pernet_subsys(&netfilter_net_ops) < 0) panic("cannot create netfilter proc entry"); -#endif if (netfilter_log_init() < 0) panic("cannot initialize nf_log"); -- cgit v1.2.3-70-g09d2 From 30e0c6a6bee24db0166b7ca709277cd693e179f2 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sun, 24 Mar 2013 23:50:40 +0000 Subject: netfilter: nf_log: prepare net namespace support for loggers This patch adds netns support to nf_log and it prepares netns support for existing loggers. It is composed of four major changes. 1) nf_log_register has been split to two functions: nf_log_register and nf_log_set. The new nf_log_register is used to globally register the nf_logger and nf_log_set is used for enabling pernet support from nf_loggers. Per netns is not yet complete after this patch, it comes in separate follow up patches. 2) Add net as a parameter of nf_log_bind_pf. Per netns is not yet complete after this patch, it only allows to bind the nf_logger to the protocol family from init_net and it skips other cases. 3) Adapt all nf_log_packet callers to pass netns as parameter. After this patch, this function only works for init_net. 4) Make the sysctl net/netfilter/nf_log pernet. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 14 +- include/net/netns/netfilter.h | 7 + net/bridge/netfilter/ebt_log.c | 7 +- net/bridge/netfilter/ebt_nflog.c | 5 +- net/ipv4/netfilter/ip_tables.c | 3 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 +- net/ipv6/netfilter/ip6_tables.c | 3 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 7 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_proto_dccp.c | 9 +- net/netfilter/nf_conntrack_proto_tcp.c | 18 +- net/netfilter/nf_conntrack_proto_udp.c | 6 +- net/netfilter/nf_conntrack_proto_udplite.c | 8 +- net/netfilter/nf_log.c | 225 ++++++++++++++++++------- net/netfilter/nfnetlink_log.c | 5 +- net/netfilter/xt_osf.c | 6 +- 16 files changed, 233 insertions(+), 100 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index e991bd0a27a..31f1fb9eb78 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -49,12 +49,18 @@ struct nf_logger { int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger); -void nf_log_unbind_pf(u_int8_t pf); +void nf_log_set(struct net *net, u_int8_t pf, + const struct nf_logger *logger); +void nf_log_unset(struct net *net, const struct nf_logger *logger); + +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger); +void nf_log_unbind_pf(struct net *net, u_int8_t pf); /* Calls the registered backend logging function */ -__printf(7, 8) -void nf_log_packet(u_int8_t pf, +__printf(8, 9) +void nf_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 248ca1c68db..88740024ccf 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -2,10 +2,17 @@ #define __NETNS_NETFILTER_H #include +#include + +struct nf_logger; struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; +#endif + const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *nf_log_dir_header; #endif }; #endif diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 92de5e5f9db..08e5ea5ec4e 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -176,17 +176,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; li.u.log.logflags = info->bitmask; if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, + par->in, par->out, &li, "%s", info->prefix); else ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + par->out, &li, info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 5be68bbcc34..59ac7952010 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, - &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1b433aac266..e391db1f056 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,6 +258,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -270,7 +271,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 5241d997ab7..c2cd63d2d89 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -187,8 +187,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: short packet "); + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, + NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } @@ -196,7 +196,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; } @@ -209,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 341b54ade72..8861b1ef420 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -284,6 +284,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -296,7 +297,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 24df3dde007..b3807c5cb88 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL, + NULL, NULL, "nf_ct_icmpv6: invalid new with type %d ", type + 128); return false; @@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } @@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 94b4b9853f6..a0b1c5c23d1 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -353,7 +353,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, + nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); va_end(args); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ba65b2041eb..a99b6c3427b 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, + NULL, msg); return false; } @@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } @@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 83876e9877f..f021a2076c8 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,7 +720,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -772,7 +772,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -780,7 +780,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -793,7 +793,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -802,7 +802,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -949,7 +949,7 @@ static int tcp_packet(struct nf_conn *ct, } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored in " "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; @@ -959,7 +959,7 @@ static int tcp_packet(struct nf_conn *ct, dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: @@ -969,8 +969,8 @@ static int tcp_packet(struct nf_conn *ct, /* Invalid RST */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid RST "); + nf_log_packet(net, pf, 0, skb, NULL, NULL, + NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 59623cc56e8..fee43228e11 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -119,7 +119,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } @@ -127,7 +127,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -143,7 +143,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index ca969f6273f..2750e6c69f8 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; } @@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; } @@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, /* UDPLITE mandates checksums */ if (!hdr->check) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; } @@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9e312695c81..8d331dc9b1e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,6 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); @@ -32,13 +31,52 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } +void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +{ + const struct nf_logger *log; + + if (!net_eq(net, &init_net)) + return; + + if (pf == NFPROTO_UNSPEC) + return; + + mutex_lock(&nf_log_mutex); + log = rcu_dereference_protected(net->nf.nf_loggers[pf], + lockdep_is_held(&nf_log_mutex)); + if (log == NULL) + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); + + mutex_unlock(&nf_log_mutex); +} +EXPORT_SYMBOL(nf_log_set); + +void nf_log_unset(struct net *net, const struct nf_logger *logger) +{ + int i; + const struct nf_logger *log; + + if (!net_eq(net, &init_net)) + return; + + mutex_lock(&nf_log_mutex); + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + log = rcu_dereference_protected(net->nf.nf_loggers[i], + lockdep_is_held(&nf_log_mutex)); + if (log == logger) + RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); + } + mutex_unlock(&nf_log_mutex); + synchronize_rcu(); +} +EXPORT_SYMBOL(nf_log_unset); + /* return EEXIST if the same logger is registered, 0 on success. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger) { - const struct nf_logger *llog; int i; - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; for (i = 0; i < ARRAY_SIZE(logger->list); i++) @@ -52,63 +90,62 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) } else { /* register at end of list to honor first register win */ list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); - llog = rcu_dereference_protected(nf_loggers[pf], - lockdep_is_held(&nf_log_mutex)); - if (llog == NULL) - rcu_assign_pointer(nf_loggers[pf], logger); } mutex_unlock(&nf_log_mutex); + nf_log_set(&init_net, pf, logger); return 0; } EXPORT_SYMBOL(nf_log_register); void nf_log_unregister(struct nf_logger *logger) { - const struct nf_logger *c_logger; int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { - c_logger = rcu_dereference_protected(nf_loggers[i], - lockdep_is_held(&nf_log_mutex)); - if (c_logger == logger) - RCU_INIT_POINTER(nf_loggers[i], NULL); + for (i = 0; i < NFPROTO_NUMPROTO; i++) list_del(&logger->list[i]); - } mutex_unlock(&nf_log_mutex); - synchronize_rcu(); + nf_log_unset(&init_net, logger); } EXPORT_SYMBOL(nf_log_unregister); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (!net_eq(net, &init_net)) + return 0; + + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[pf], logger); + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); return 0; } EXPORT_SYMBOL(nf_log_bind_pf); -void nf_log_unbind_pf(u_int8_t pf) +void nf_log_unbind_pf(struct net *net, u_int8_t pf) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (!net_eq(net, &init_net)) + return; + + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return; mutex_lock(&nf_log_mutex); - RCU_INIT_POINTER(nf_loggers[pf], NULL); + RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unbind_pf); -void nf_log_packet(u_int8_t pf, +void nf_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -120,8 +157,11 @@ void nf_log_packet(u_int8_t pf, char prefix[NF_LOG_PREFIXLEN]; const struct nf_logger *logger; + if (!net_eq(net, &init_net)) + return; + rcu_read_lock(); - logger = rcu_dereference(nf_loggers[pf]); + logger = rcu_dereference(net->nf.nf_loggers[pf]); if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); @@ -135,9 +175,11 @@ EXPORT_SYMBOL(nf_log_packet); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); + mutex_lock(&nf_log_mutex); - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -145,9 +187,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos) static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { + struct net *net = seq_file_net(s); + (*pos)++; - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -164,8 +208,9 @@ static int seq_show(struct seq_file *s, void *v) const struct nf_logger *logger; struct nf_logger *t; int ret; + struct net *net = seq_file_net(s); - logger = rcu_dereference_protected(nf_loggers[*pos], + logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], lockdep_is_held(&nf_log_mutex)); if (!logger) @@ -199,7 +244,8 @@ static const struct seq_operations nflog_seq_ops = { static int nflog_open(struct inode *inode, struct file *file) { - return seq_open(file, &nflog_seq_ops); + return seq_open_net(inode, file, &nflog_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations nflog_file_ops = { @@ -207,7 +253,7 @@ static const struct file_operations nflog_file_ops = { .open = nflog_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; @@ -216,7 +262,6 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static struct ctl_table_header *nf_log_dir_header; static int nf_log_proc_dostring(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -226,15 +271,19 @@ static int nf_log_proc_dostring(ctl_table *table, int write, size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; + struct net *net = current->nsproxy->net_ns; if (write) { + if (!net_eq(net, &init_net)) + return -EPERM; + if (size > sizeof(buf)) size = sizeof(buf); if (copy_from_user(buf, buffer, size)) return -EFAULT; if (!strcmp(buf, "NONE")) { - nf_log_unbind_pf(tindex); + nf_log_unbind_pf(net, tindex); return 0; } mutex_lock(&nf_log_mutex); @@ -243,11 +292,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[tindex], logger); + rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); - logger = rcu_dereference_protected(nf_loggers[tindex], + logger = rcu_dereference_protected(net->nf.nf_loggers[tindex], lockdep_is_held(&nf_log_mutex)); if (!logger) table->data = "NONE"; @@ -260,49 +309,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write, return r; } -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { int i; - - for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { - snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); - nf_log_sysctl_table[i].procname = - nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; - nf_log_sysctl_table[i].data = NULL; - nf_log_sysctl_table[i].maxlen = - NFLOGGER_NAME_LEN * sizeof(char); - nf_log_sysctl_table[i].mode = 0644; - nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; - nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; + struct ctl_table *table; + + table = nf_log_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_log_sysctl_table, + sizeof(nf_log_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } else { + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + snprintf(nf_log_sysctl_fnames[i], + 3, "%d", i); + nf_log_sysctl_table[i].procname = + nf_log_sysctl_fnames[i]; + nf_log_sysctl_table[i].data = NULL; + nf_log_sysctl_table[i].maxlen = + NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].mode = 0644; + nf_log_sysctl_table[i].proc_handler = + nf_log_proc_dostring; + nf_log_sysctl_table[i].extra1 = + (void *)(unsigned long) i; + } } - nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", - nf_log_sysctl_table); - if (!nf_log_dir_header) - return -ENOMEM; + net->nf.nf_log_dir_header = register_net_sysctl(net, + "net/netfilter/nf_log", + table); + if (!net->nf.nf_log_dir_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void netfilter_log_sysctl_exit(struct net *net) +{ + struct ctl_table *table; + + table = net->nf.nf_log_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_log_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); } #else -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { return 0; } + +static void netfilter_log_sysctl_exit(struct net *net) +{ +} #endif /* CONFIG_SYSCTL */ -int __init netfilter_log_init(void) +static int __net_init nf_log_net_init(struct net *net) { - int i, r; + int ret = -ENOMEM; + #ifdef CONFIG_PROC_FS if (!proc_create("nf_log", S_IRUGO, - proc_net_netfilter, &nflog_file_ops)) - return -1; + net->nf.proc_netfilter, &nflog_file_ops)) + return ret; #endif + ret = netfilter_log_sysctl_init(net); + if (ret < 0) + goto out_sysctl; - /* Errors will trigger panic, unroll on error is unnecessary. */ - r = netfilter_log_sysctl_init(); - if (r < 0) - return r; + return 0; + +out_sysctl: + /* For init_net: errors will trigger panic, don't unroll on error. */ + if (!net_eq(net, &init_net)) + remove_proc_entry("nf_log", net->nf.proc_netfilter); + + return ret; +} + +static void __net_exit nf_log_net_exit(struct net *net) +{ + netfilter_log_sysctl_exit(net); + remove_proc_entry("nf_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nf_log_net_ops = { + .init = nf_log_net_init, + .exit = nf_log_net_exit, +}; + +int __init netfilter_log_init(void) +{ + int i, ret; + + ret = register_pernet_subsys(&nf_log_net_ops); + if (ret < 0) + return ret; for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&(nf_loggers_l[i])); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f248db57297..b593fd1f8ca 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -767,6 +767,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; struct nfulnl_msg_config_cmd *cmd = NULL; + struct net *net = sock_net(ctnl); int ret = 0; if (nfula[NFULA_CFG_CMD]) { @@ -776,9 +777,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, /* Commands without queue context */ switch (cmd->command) { case NFULNL_CFG_CMD_PF_BIND: - return nf_log_bind_pf(pf, &nfulnl_logger); + return nf_log_bind_pf(net, pf, &nfulnl_logger); case NFULNL_CFG_CMD_PF_UNBIND: - nf_log_unbind_pf(pf); + nf_log_unbind_pf(net, pf); return 0; } } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index a5e673d32bd..647d989a01e 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; + struct net *net = dev_net(p->in ? p->in : p->out); if (!info) return false; @@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(p->family, p->hooknum, skb, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, p->out, NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, @@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, + p->out, NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); -- cgit v1.2.3-70-g09d2 From 12202fa7573d32aa0915cde6e8fab4c86b63ca2c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 5 Apr 2013 19:40:10 +0200 Subject: netfilter: remove unneeded variable proc_net_netfilter Now that this supports net namespace for nflog and nfqueue, we can remove the global proc_net_netfilter which has no clients anymore. Based on patch from Gao feng. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 5 ----- net/netfilter/core.c | 16 ++++------------ 2 files changed, 4 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ee142846f56..0060fde3160 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #endif } -#ifdef CONFIG_PROC_FS -#include -extern struct proc_dir_entry *proc_net_netfilter; -#endif - #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index b085184d9b4..7d97302f7c0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -276,23 +276,15 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -#ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_netfilter; -EXPORT_SYMBOL(proc_net_netfilter); -#endif - static int __net_init netfilter_net_init(struct net *net) { #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", net->proc_net); - if (net_eq(net, &init_net)) { - if (!net->nf.proc_netfilter) - return -ENOMEM; - else - proc_net_netfilter = net->nf.proc_netfilter; - } else if (!net->nf.proc_netfilter) { - pr_err("cannot create netfilter proc entry"); + if (!net->nf.proc_netfilter) { + if (!net_eq(net, &init_net)) + pr_err("cannot create netfilter proc entry"); + return -ENOMEM; } #endif -- cgit v1.2.3-70-g09d2 From b3916db32c4a3124eee9f3742a2f4723731d7602 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 5 Apr 2013 14:57:34 +0200 Subject: Bluetooth: hidp: verify l2cap sockets We need to verify that the given sockets actually are l2cap sockets. If they aren't, we are not supposed to access bt_sk(sock) and we shouldn't start the session if the offsets turn out to be valid local BT addresses. That is, if someone passes a TCP socket to HIDCONNADD, then we access some random offset in the TCP socket (which isn't even guaranteed to be valid). Fix this by checking that the socket is an l2cap socket. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/hidp/core.c | 2 ++ net/bluetooth/l2cap_sock.c | 6 ++++++ 3 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index cdd33021f83..278830ef92c 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -786,6 +786,7 @@ extern bool disable_ertm; int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); +bool l2cap_is_socket(struct socket *sock); void __l2cap_connect_rsp_defer(struct l2cap_chan *chan); int __l2cap_wait_ack(struct sock *sk); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 2342327f333..4ab82cb3eac 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -973,6 +973,8 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, BT_DBG(""); + if (!l2cap_is_socket(ctrl_sock) || !l2cap_is_socket(intr_sock)) + return -EINVAL; if (bacmp(&bt_sk(ctrl_sock->sk)->src, &bt_sk(intr_sock->sk)->src) || bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst)) return -ENOTUNIQ; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7f9704993b7..141e7b058b7 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -43,6 +43,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio); +bool l2cap_is_socket(struct socket *sock) +{ + return sock && sock->ops == &l2cap_sock_ops; +} +EXPORT_SYMBOL(l2cap_is_socket); + static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; -- cgit v1.2.3-70-g09d2 From 540b3a39eea0056d305f17dda47eb185c4d56ddc Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 7 Apr 2013 03:44:07 +0000 Subject: net/mlx4_en: Enable DCB ETS ops only when supported by the firmware Enable the DCB ETS ops only when supported by the firmware. For older firmware/cards which don't support ETS, advertize only PFC DCB ops. Signed-off-by: Eugenia Emantayev Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 8 ++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 10 ++++++++-- drivers/net/ethernet/mellanox/mlx4/fw.c | 1 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 1 + 5 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index b799ab12a29..321553fd58d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -253,3 +253,11 @@ const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = { .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, }; + +const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = { + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .getdcbx = mlx4_en_dcbnl_getdcbx, + .setdcbx = mlx4_en_dcbnl_setdcbx, +}; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 473c9d2fec1..d2a4f919bf1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2013,8 +2013,14 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); #ifdef CONFIG_MLX4_EN_DCB - if (!mlx4_is_slave(priv->mdev->dev)) - dev->dcbnl_ops = &mlx4_en_dcbnl_ops; + if (!mlx4_is_slave(priv->mdev->dev)) { + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { + dev->dcbnl_ops = &mlx4_en_dcbnl_ops; + } else { + en_info(priv, "enabling only PFC DCB ops\n"); + dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; + } + } #endif for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 876439748cd..ab470d991ad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -109,6 +109,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [48] = "Counters support", + [53] = "Port ETS Scheduler support", [55] = "Port link type sensing support", [59] = "Port management change event support", [61] = "64 byte EQE support", diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f710b7ce0dc..d4cb5d3b28a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -624,6 +624,7 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); #ifdef CONFIG_MLX4_EN_DCB extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; +extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; #endif int mlx4_en_setup_tc(struct net_device *dev, u8 up); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 811f91cf5e8..1bc5a750b33 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -140,6 +140,7 @@ enum { MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, + MLX4_DEV_CAP_FLAG_SET_ETH_SCHED = 1LL << 53, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61, -- cgit v1.2.3-70-g09d2 From 6b0ee8c036ecb3ac92e18e6ca0dca7bff88beaf0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 17:28:16 +0000 Subject: scm: Stop passing struct cred Now that uids and gids are completely encapsulated in kuid_t and kgid_t we no longer need to pass struct cred which allowed us to test both the uid and the user namespace for equality. Passing struct cred potentially allows us to pass the entire group list as BSD does but I don't believe the cost of cache line misses justifies retaining code for a future potential application. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 ++- include/net/scm.h | 16 ++++++---------- net/core/scm.c | 16 ---------------- net/unix/af_unix.c | 16 ++++++++-------- 4 files changed, 16 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 0a996a3517e..a8836e8445c 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -29,7 +29,8 @@ struct unix_address { struct unix_skb_parms { struct pid *pid; /* Skb credentials */ - const struct cred *cred; + kuid_t uid; + kgid_t gid; struct scm_fp_list *fp; /* Passed files */ #ifdef CONFIG_SECURITY_NETWORK u32 secid; /* Security ID */ diff --git a/include/net/scm.h b/include/net/scm.h index 975cca01048..5a4c6a9eb12 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -26,7 +26,6 @@ struct scm_fp_list { struct scm_cookie { struct pid *pid; /* Skb credentials */ - const struct cred *cred; struct scm_fp_list *fp; /* Passed files */ struct scm_creds creds; /* Skb credentials */ #ifdef CONFIG_SECURITY_NETWORK @@ -51,23 +50,18 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_set_cred(struct scm_cookie *scm, - struct pid *pid, const struct cred *cred) + struct pid *pid, kuid_t uid, kgid_t gid) { scm->pid = get_pid(pid); - scm->cred = cred ? get_cred(cred) : NULL; scm->creds.pid = pid_vnr(pid); - scm->creds.uid = cred ? cred->euid : INVALID_UID; - scm->creds.gid = cred ? cred->egid : INVALID_GID; + scm->creds.uid = uid; + scm->creds.gid = gid; } static __inline__ void scm_destroy_cred(struct scm_cookie *scm) { put_pid(scm->pid); scm->pid = NULL; - - if (scm->cred) - put_cred(scm->cred); - scm->cred = NULL; } static __inline__ void scm_destroy(struct scm_cookie *scm) @@ -81,8 +75,10 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, bool forcecreds) { memset(scm, 0, sizeof(*scm)); + scm->creds.uid = INVALID_UID; + scm->creds.gid = INVALID_GID; if (forcecreds) - scm_set_cred(scm, task_tgid(current), current_cred()); + scm_set_cred(scm, task_tgid(current), current_euid(), current_egid()); unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; diff --git a/net/core/scm.c b/net/core/scm.c index 2dc6cdaaae8..83b2b383c86 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -187,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->creds.uid = uid; p->creds.gid = gid; - - if (!p->cred || - !uid_eq(p->cred->euid, uid) || - !gid_eq(p->cred->egid, gid)) { - struct cred *cred; - err = -ENOMEM; - cred = prepare_creds(); - if (!cred) - goto error; - - cred->uid = cred->euid = uid; - cred->gid = cred->egid = gid; - if (p->cred) - put_cred(p->cred); - p->cred = cred; - } break; } default: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 824eaf2c3af..5ca1631de7e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb) struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; - scm.cred = UNIXCB(skb).cred; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); @@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen int err = 0; UNIXCB(skb).pid = get_pid(scm->pid); - if (scm->cred) - UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).uid = scm->creds.uid; + UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, const struct sock *other) { - if (UNIXCB(skb).cred) + if (UNIXCB(skb).pid) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || !other->sk_socket || test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); - UNIXCB(skb).cred = get_current_cred(); + current_euid_egid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } @@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1991,11 +1990,12 @@ again: if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) + !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || + !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } -- cgit v1.2.3-70-g09d2 From 79ba1d8910f517c3bd39d794ddb1a5b4c03795c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Mar 2013 14:38:07 +0100 Subject: mac80211: parse Timeout Interval Element using a struct Instead of open-coding the accesses and length check do the length check in the IE parser and assign a struct pointer for use in the remaining code. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/mlme.c | 6 +++--- net/mac80211/util.c | 6 ++++-- 4 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d10b5bba326..e46fea8b972 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1955,6 +1955,16 @@ enum ieee80211_timeout_interval_type { WLAN_TIMEOUT_ASSOC_COMEBACK = 3 /* 802.11w */, }; +/** + * struct ieee80211_timeout_interval_ie - Timeout Interval element + * @type: type, see &enum ieee80211_timeout_interval_type + * @value: timeout interval value + */ +struct ieee80211_timeout_interval_ie { + u8 type; + __le32 value; +} __packed; + /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6ad019d3262..c783e996bcc 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1180,7 +1180,7 @@ struct ieee802_11_elems { const struct ieee80211_channel_sw_ie *ch_switch_ie; const u8 *country_elem; const u8 *pwr_constr_elem; - const u8 *timeout_int; + const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; /* length of them, respectively */ @@ -1198,7 +1198,6 @@ struct ieee802_11_elems { u8 prep_len; u8 perr_len; u8 country_elem_len; - u8 timeout_int_len; /* whether a parse error occurred while retrieving these elements */ bool parse_error; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 157d951df7a..304d6cfc625 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2629,10 +2629,10 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && - elems.timeout_int && elems.timeout_int_len == 5 && - elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) { + elems.timeout_int && + elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) { u32 tu, ms; - tu = get_unaligned_le32(elems.timeout_int + 1); + tu = le32_to_cpu(elems.timeout_int->value); ms = tu * 1024 / 1000; sdata_info(sdata, "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4839dec5c9a..f9581c6378a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -874,8 +874,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->pwr_constr_elem = pos; break; case WLAN_EID_TIMEOUT_INTERVAL: - elems->timeout_int = pos; - elems->timeout_int_len = elen; + if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) + elems->timeout_int = (void *)pos; + else + elem_parse_failed = true; break; default: break; -- cgit v1.2.3-70-g09d2 From d87c8c6d1562f12df101c5b9857170d110e7353a Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Thu, 4 Apr 2013 06:32:12 +0000 Subject: IEEE 802.15.4: remove get_bsn from "struct ieee802154_mlme_ops" It served no purpose: we never call it from anywhere in the stack and the only driver that did implement it (fakehard) merely provided a dummy value. There is also considerable doubt whether it would make sense to even attempt beacon processing at this level in the Linux kernel. Signed-off-by: Werner Almesberger Signed-off-by: David S. Miller --- drivers/net/ieee802154/fakehard.c | 21 --------------------- include/net/ieee802154_netdev.h | 1 - 2 files changed, 22 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c index 8f1c25676d4..bf0d55e2dd6 100644 --- a/drivers/net/ieee802154/fakehard.c +++ b/drivers/net/ieee802154/fakehard.c @@ -105,26 +105,6 @@ static u8 fake_get_dsn(const struct net_device *dev) return 0x00; /* DSN are implemented in HW, so return just 0 */ } -/** - * fake_get_bsn - Retrieve the BSN of the device. - * @dev: The network device to retrieve the BSN for. - * - * Returns the IEEE 802.15.4 BSN for the network device. - * The BSN is the sequence number which will be added to each - * beacon frame sent by the MAC. - * - * BSN means 'Beacon Sequence Number'. - * - * Note: This is in section 7.2.1.2 of the IEEE 802.15.4-2006 - * document. - */ -static u8 fake_get_bsn(const struct net_device *dev) -{ - BUG_ON(dev->type != ARPHRD_IEEE802154); - - return 0x00; /* BSN are implemented in HW, so return just 0 */ -} - /** * fake_assoc_req - Make an association request to the HW. * @dev: The network device which we are associating to a network. @@ -264,7 +244,6 @@ static struct ieee802154_mlme_ops fake_mlme = { .get_pan_id = fake_get_pan_id, .get_short_addr = fake_get_short_addr, .get_dsn = fake_get_dsn, - .get_bsn = fake_get_bsn, }; static int ieee802154_fake_open(struct net_device *dev) diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index d104c882fc2..642f94c0fa2 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -110,7 +110,6 @@ struct ieee802154_mlme_ops { u16 (*get_pan_id)(const struct net_device *dev); u16 (*get_short_addr)(const struct net_device *dev); u8 (*get_dsn)(const struct net_device *dev); - u8 (*get_bsn)(const struct net_device *dev); }; /* The IEEE 802.15.4 standard defines 2 type of the devices: -- cgit v1.2.3-70-g09d2 From 56aa091d60a63fee83d2c894edb69b7c159966c7 Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Thu, 4 Apr 2013 06:32:35 +0000 Subject: ieee802154/nl-mac.c: make some MLME operations optional Check for NULL before calling the following operations from "struct ieee802154_mlme_ops": assoc_req, assoc_resp, disassoc_req, start_req, and scan_req. This fixes a current oops where those functions are called but not implemented. It also updates the documentation to clarify that they are now optional by design. If a call to an unimplemented function is attempted, the kernel returns EOPNOTSUPP via netlink. The following operations are still required: get_phy, get_pan_id, get_short_addr, and get_dsn. Note that the places where this patch changes the initialization of "ret" should not affect the rest of the code since "ret" was always set (again) before returning its value. Signed-off-by: Werner Almesberger Signed-off-by: David S. Miller --- Documentation/networking/ieee802154.txt | 5 +++-- include/net/ieee802154_netdev.h | 4 ++++ net/ieee802154/nl-mac.c | 25 ++++++++++++++++++++----- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt index 703cf4370c7..67a9cb259d4 100644 --- a/Documentation/networking/ieee802154.txt +++ b/Documentation/networking/ieee802154.txt @@ -71,8 +71,9 @@ submits skb to qdisc), so if you need something from that cb later, you should store info in the skb->data on your own. To hook the MLME interface you have to populate the ml_priv field of your -net_device with a pointer to struct ieee802154_mlme_ops instance. All fields are -required. +net_device with a pointer to struct ieee802154_mlme_ops instance. The fields +assoc_req, assoc_resp, disassoc_req, start_req, and scan_req are optional. +All other fields are required. We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 642f94c0fa2..8196d5d4035 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -85,6 +85,8 @@ struct wpan_phy; * Use wpan_wpy_put to put that reference. */ struct ieee802154_mlme_ops { + /* The following fields are optional (can be NULL). */ + int (*assoc_req)(struct net_device *dev, struct ieee802154_addr *addr, u8 channel, u8 page, u8 cap); @@ -101,6 +103,8 @@ struct ieee802154_mlme_ops { int (*scan_req)(struct net_device *dev, u8 type, u32 channels, u8 page, u8 duration); + /* The fields below are required. */ + struct wpan_phy *(*get_phy)(const struct net_device *dev); /* diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 96bb08abece..b0bdd8c51e9 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, struct net_device *dev; struct ieee802154_addr addr; u8 page; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_CHANNEL] || !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || @@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, page, nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); +out: dev_put(dev); return ret; } @@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_STATUS] || !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || @@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_resp) + goto out; addr.addr_type = IEEE802154_ADDR_LONG; nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], @@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); +out: dev_put(dev); return ret; } @@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || @@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->disassoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); +out: dev_put(dev); return ret; } @@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) u8 channel, bcn_ord, sf_ord; u8 page; int pan_coord, blx, coord_realign; - int ret; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || @@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->start_req) + goto out; addr.addr_type = IEEE802154_ADDR_SHORT; addr.short_addr = nla_get_u16( @@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, bcn_ord, sf_ord, pan_coord, blx, coord_realign); +out: dev_put(dev); return ret; } @@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; - int ret; + int ret = -EOPNOTSUPP; u8 type; u32 channels; u8 duration; @@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->scan_req) + goto out; type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); @@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, duration); +out: dev_put(dev); return ret; } -- cgit v1.2.3-70-g09d2 From f53adae4eae5ad9f7343ff4a0fc68b468c981138 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Apr 2013 04:01:30 +0000 Subject: net: ipv6: add tokenized interface identifier support This patch adds support for IPv6 tokenized IIDs, that allow for administrators to assign well-known host-part addresses to nodes whilst still obtaining global network prefix from Router Advertisements. It is currently in draft status. The primary target for such support is server platforms where addresses are usually manually configured, rather than using DHCPv6 or SLAAC. By using tokenised identifiers, hosts can still determine their network prefix by use of SLAAC, but more readily be automatically renumbered should their network prefix change. [...] The disadvantage with static addresses is that they are likely to require manual editing should the network prefix in use change. If instead there were a method to only manually configure the static identifier part of the IPv6 address, then the address could be automatically updated when a new prefix was introduced, as described in [RFC4192] for example. In such cases a DNS server might be configured with such a tokenised interface identifier of ::53, and SLAAC would use the token in constructing the interface address, using the advertised prefix. [...] http://tools.ietf.org/html/draft-chown-6man-tokenised-ipv6-identifiers-02 The implementation is partially based on top of Mark K. Thompson's proof of concept. However, it uses the Netlink interface for configuration resp. data retrival, so that it can be easily extended in future. Successfully tested by myself. Cc: Hannes Frederic Sowa Cc: YOSHIFUJI Hideaki Cc: Thomas Graf Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/if_inet6.h | 2 + include/uapi/linux/if_link.h | 1 + net/ipv6/addrconf.c | 87 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 93563221d29..f1063d62cd1 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -187,6 +187,8 @@ struct inet6_dev { struct list_head tempaddr_list; #endif + struct in6_addr token; + struct neigh_parms *nd_parms; struct inet6_dev *next; struct ipv6_devconf cnf; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c4edfe11f1f..6b35c4211f6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -201,6 +201,7 @@ enum { IFLA_INET6_MCAST, /* MC things. What of them? */ IFLA_INET6_CACHEINFO, /* time values and max reasm size */ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ + IFLA_INET6_TOKEN, /* device token */ __IFLA_INET6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a33b157d9cc..65d8139d60c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_regen_rndid((unsigned long) ndev); } #endif + memset(ndev->token.s6_addr, 0, sizeof(ndev->token.s6_addr)); if (netif_running(dev) && addrconf_qdisc_ok(dev)) ndev->if_flags |= IF_READY; @@ -2136,8 +2137,14 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && - ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { + + if (!ipv6_addr_any(&in6_dev->token)) { + read_lock_bh(&in6_dev->lock); + memcpy(addr.s6_addr + 8, + in6_dev->token.s6_addr + 8, 8); + read_unlock_bh(&in6_dev->lock); + } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && + ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); return; } @@ -4165,7 +4172,8 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(sizeof(struct ifla_cacheinfo)) + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */ } static inline size_t inet6_if_nlmsg_size(void) @@ -4252,6 +4260,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); + if (nla == NULL) + goto nla_put_failure; + read_lock_bh(&idev->lock); + memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); + read_unlock_bh(&idev->lock); + return 0; nla_put_failure: @@ -4279,6 +4294,71 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return 0; } +static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) +{ + struct in6_addr ll_addr; + struct inet6_ifaddr *ifp; + struct net_device *dev = idev->dev; + + if (token == NULL) + return -EINVAL; + if (ipv6_addr_any(token)) + return -EINVAL; + if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) + return -EINVAL; + if (idev->dead || !(idev->if_flags & IF_READY)) + return -EINVAL; + if (!ipv6_accept_ra(idev)) + return -EINVAL; + if (idev->cnf.rtr_solicits <= 0) + return -EINVAL; + + write_lock_bh(&idev->lock); + + BUILD_BUG_ON(sizeof(token->s6_addr) != 16); + memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8); + + write_unlock_bh(&idev->lock); + + ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | IFA_F_OPTIMISTIC); + ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters); + + write_lock_bh(&idev->lock); + idev->if_flags |= IF_RS_SENT; + + /* Well, that's kinda nasty ... */ + list_for_each_entry(ifp, &idev->addr_list, if_list) { + spin_lock(&ifp->lock); + if (ipv6_addr_src_scope(&ifp->addr) == + IPV6_ADDR_SCOPE_GLOBAL) { + ifp->valid_lft = 0; + ifp->prefered_lft = 0; + } + spin_unlock(&ifp->lock); + } + + write_unlock_bh(&idev->lock); + return 0; +} + +static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ + int err = -EINVAL; + struct inet6_dev *idev = __in6_dev_get(dev); + struct nlattr *tb[IFLA_INET6_MAX + 1]; + + if (!idev) + return -EAFNOSUPPORT; + + if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) + BUG(); + + if (tb[IFLA_INET6_TOKEN]) + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + + return err; +} + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 portid, u32 seq, int event, unsigned int flags) { @@ -4981,6 +5061,7 @@ static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, + .set_link_af = inet6_set_link_af, }; /* -- cgit v1.2.3-70-g09d2 From 617fe29d45bdfffba2739e6512c83e766e6ae72c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2013 03:47:16 +0000 Subject: net: ipv6: only invalidate previously tokenized addresses Instead of invalidating all IPv6 addresses with global scope when one decides to use IPv6 tokens, we should only invalidate previous tokens and leave the rest intact until they expire eventually (or are intact forever). For doing this less greedy approach, we're adding a bool at the end of inet6_ifaddr structure instead, for two reasons: i) per-inet6_ifaddr flag space is already used up, making it wider might not be a good idea, since ii) also we do not necessarily need to export this information into user space. Suggested-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/if_inet6.h | 2 ++ net/ipv6/addrconf.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f1063d62cd1..100fb8cec17 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -71,6 +71,8 @@ struct inet6_ifaddr { struct inet6_ifaddr *ifpub; int regen_count; #endif + bool tokenized; + struct rcu_head rcu; }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 713ebe303f6..28b61e89bbb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -878,6 +878,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; ifa->cstamp = ifa->tstamp = jiffies; + ifa->tokenized = false; ifa->rt = rt; @@ -2134,6 +2135,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) struct inet6_ifaddr *ifp; struct in6_addr addr; int create = 0, update_lft = 0; + bool tokenized = false; if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); @@ -2143,6 +2145,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) memcpy(addr.s6_addr + 8, in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); + tokenized = true; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); @@ -2185,6 +2188,7 @@ ok: update_lft = create = 1; ifp->cstamp = jiffies; + ifp->tokenized = tokenized; addrconf_dad_start(ifp); } @@ -4339,8 +4343,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) /* Well, that's kinda nasty ... */ list_for_each_entry(ifp, &idev->addr_list, if_list) { spin_lock(&ifp->lock); - if (ipv6_addr_src_scope(&ifp->addr) == - IPV6_ADDR_SCOPE_GLOBAL) { + if (ifp->tokenized) { ifp->valid_lft = 0; ifp->prefered_lft = 0; } -- cgit v1.2.3-70-g09d2 From 211d2f97e936d206a5e45f6f64ecbc2c51a2b46c Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Mon, 8 Apr 2013 20:03:35 +0000 Subject: cls_cgroup: remove task_struct parameter from sock_update_classid() The callers always pass current to sock_update_classid(). Signed-off-by: Li Zefan Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/cls_cgroup.h | 4 ++-- net/core/scm.c | 2 +- net/core/sock.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 2581638f4a3..0fee0617fb7 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -24,7 +24,7 @@ struct cgroup_cls_state u32 classid; }; -extern void sock_update_classid(struct sock *sk, struct task_struct *task); +extern void sock_update_classid(struct sock *sk); #if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) static inline u32 task_cls_classid(struct task_struct *p) @@ -61,7 +61,7 @@ static inline u32 task_cls_classid(struct task_struct *p) } #endif #else /* !CGROUP_NET_CLS_CGROUP */ -static inline void sock_update_classid(struct sock *sk, struct task_struct *task) +static inline void sock_update_classid(struct sock *sk) { } diff --git a/net/core/scm.c b/net/core/scm.c index 83b2b383c86..c77b7c3d2f9 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -291,7 +291,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) sock = sock_from_file(fp[i], &err); if (sock) { sock_update_netprioidx(sock->sk, current); - sock_update_classid(sock->sk, current); + sock_update_classid(sock->sk); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index 2ff5f3619a8..0e1d2fe827e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1307,11 +1307,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) } #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk, struct task_struct *task) +void sock_update_classid(struct sock *sk) { u32 classid; - classid = task_cls_classid(task); + classid = task_cls_classid(current); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1353,7 +1353,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk, current); + sock_update_classid(sk); sock_update_netprioidx(sk, current); } -- cgit v1.2.3-70-g09d2 From 6ffd46410248ee39b46c2cdafb79791c2e618932 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Mon, 8 Apr 2013 20:03:47 +0000 Subject: netprio_cgroup: remove task_struct parameter from sock_update_netprio() The callers always pass current to sock_update_netprio(). Signed-off-by: Li Zefan Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/netprio_cgroup.h | 4 ++-- net/core/scm.c | 2 +- net/core/sock.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 1d04b6f0fbd..50ab8c26ab5 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -29,7 +29,7 @@ struct cgroup_netprio_state { struct cgroup_subsys_state css; }; -extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); +extern void sock_update_netprioidx(struct sock *sk); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -68,7 +68,7 @@ static inline u32 task_netprioidx(struct task_struct *p) return 0; } -#define sock_update_netprioidx(sk, task) +#define sock_update_netprioidx(sk) #endif /* CONFIG_NETPRIO_CGROUP */ diff --git a/net/core/scm.c b/net/core/scm.c index c77b7c3d2f9..03795d0147f 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -290,7 +290,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk, current); + sock_update_netprioidx(sock->sk); sock_update_classid(sock->sk); } fd_install(new_fd, get_file(fp[i])); diff --git a/net/core/sock.c b/net/core/sock.c index 0e1d2fe827e..d4f4cea726e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1319,12 +1319,12 @@ EXPORT_SYMBOL(sock_update_classid); #endif #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) -void sock_update_netprioidx(struct sock *sk, struct task_struct *task) +void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(task); + sk->sk_cgrp_prioidx = task_netprioidx(current); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif @@ -1354,7 +1354,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); - sock_update_netprioidx(sk, current); + sock_update_netprioidx(sk); } return sk; -- cgit v1.2.3-70-g09d2 From 1b8664341100716202c29d67f24d67094a82971e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2013 05:54:01 +0000 Subject: net: sctp: introduce uapi header for sctp This patch introduces an UAPI header for the SCTP protocol, so that we can facilitate the maintenance and development of user land applications or libraries, in particular in terms of header synchronization. To not break compatibility, some fragments from lksctp-tools' netinet/sctp.h have been carefully included, while taking care that neither kernel nor user land breaks, so both compile fine with this change (for lksctp-tools I tested with the old netinet/sctp.h header and with a newly adapted one that includes the uapi sctp header). lksctp-tools smoke test run through successfully as well in both cases. Suggested-by: Neil Horman Cc: Neil Horman Cc: Vlad Yasevich Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- fs/dlm/lowcomms.c | 2 +- include/linux/sctp.h | 6 +- include/net/sctp/constants.h | 1 - include/net/sctp/user.h | 782 --------------------------------------- include/uapi/linux/Kbuild | 1 + include/uapi/linux/sctp.h | 846 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 850 insertions(+), 788 deletions(-) delete mode 100644 include/net/sctp/user.h create mode 100644 include/uapi/linux/sctp.h (limited to 'include') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 4f5ad246582..d0ccd2fd79e 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -52,8 +52,8 @@ #include #include #include +#include #include -#include #include #include "dlm_internal.h" diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c11a28706fa..3bfe8d6ee24 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -53,7 +53,9 @@ #include /* We need in_addr. */ #include /* We need in6_addr. */ +#include +#include /* Section 3.1. SCTP Common Header Format */ typedef struct sctphdr { @@ -63,14 +65,10 @@ typedef struct sctphdr { __le32 checksum; } __packed sctp_sctphdr_t; -#ifdef __KERNEL__ -#include - static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { return (struct sctphdr *)skb_transport_header(skb); } -#endif /* Section 3.2. Chunk Field Descriptions. */ typedef struct sctp_chunkhdr { diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index a7dd5c50df7..ca50e0751e4 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -49,7 +49,6 @@ #include #include /* For ipv6hdr. */ -#include #include /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h deleted file mode 100644 index 9a0ae091366..00000000000 --- a/include/net/sctp/user.h +++ /dev/null @@ -1,782 +0,0 @@ -/* SCTP kernel implementation - * (C) Copyright IBM Corp. 2001, 2004 - * Copyright (c) 1999-2000 Cisco, Inc. - * Copyright (c) 1999-2001 Motorola, Inc. - * Copyright (c) 2002 Intel Corp. - * - * This file is part of the SCTP kernel implementation - * - * This header represents the structures and constants needed to support - * the SCTP Extension to the Sockets API. - * - * This SCTP implementation is free software; - * you can redistribute it and/or modify it under the terms of - * the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This SCTP implementation is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * ************************ - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - * Please send any bug reports or fixes you make to the - * email address(es): - * lksctp developers - * - * Or submit a bug report through the following website: - * http://www.sf.net/projects/lksctp - * - * Written or modified by: - * La Monte H.P. Yarroll - * R. Stewart - * K. Morneau - * Q. Xie - * Karl Knutson - * Jon Grimm - * Daisy Chang - * Ryan Layer - * Ardelle Fan - * Sridhar Samudrala - * - * Any bugs reported given to us we will try to fix... any fixes shared will - * be incorporated into the next SCTP release. - */ - -#ifndef __net_sctp_user_h__ -#define __net_sctp_user_h__ - -#include -#include - -typedef __s32 sctp_assoc_t; - -/* The following symbols come from the Sockets API Extensions for - * SCTP . - */ -#define SCTP_RTOINFO 0 -#define SCTP_ASSOCINFO 1 -#define SCTP_INITMSG 2 -#define SCTP_NODELAY 3 /* Get/set nodelay option. */ -#define SCTP_AUTOCLOSE 4 -#define SCTP_SET_PEER_PRIMARY_ADDR 5 -#define SCTP_PRIMARY_ADDR 6 -#define SCTP_ADAPTATION_LAYER 7 -#define SCTP_DISABLE_FRAGMENTS 8 -#define SCTP_PEER_ADDR_PARAMS 9 -#define SCTP_DEFAULT_SEND_PARAM 10 -#define SCTP_EVENTS 11 -#define SCTP_I_WANT_MAPPED_V4_ADDR 12 /* Turn on/off mapped v4 addresses */ -#define SCTP_MAXSEG 13 /* Get/set maximum fragment. */ -#define SCTP_STATUS 14 -#define SCTP_GET_PEER_ADDR_INFO 15 -#define SCTP_DELAYED_ACK_TIME 16 -#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME -#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME -#define SCTP_CONTEXT 17 -#define SCTP_FRAGMENT_INTERLEAVE 18 -#define SCTP_PARTIAL_DELIVERY_POINT 19 /* Set/Get partial delivery point */ -#define SCTP_MAX_BURST 20 /* Set/Get max burst */ -#define SCTP_AUTH_CHUNK 21 /* Set only: add a chunk type to authenticate */ -#define SCTP_HMAC_IDENT 22 -#define SCTP_AUTH_KEY 23 -#define SCTP_AUTH_ACTIVE_KEY 24 -#define SCTP_AUTH_DELETE_KEY 25 -#define SCTP_PEER_AUTH_CHUNKS 26 /* Read only */ -#define SCTP_LOCAL_AUTH_CHUNKS 27 /* Read only */ -#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ -#define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ -#define SCTP_AUTO_ASCONF 30 -#define SCTP_PEER_ADDR_THLDS 31 - -/* Internal Socket Options. Some of the sctp library functions are - * implemented using these socket options. - */ -#define SCTP_SOCKOPT_BINDX_ADD 100 /* BINDX requests for adding addrs */ -#define SCTP_SOCKOPT_BINDX_REM 101 /* BINDX requests for removing addrs. */ -#define SCTP_SOCKOPT_PEELOFF 102 /* peel off association. */ -/* Options 104-106 are deprecated and removed. Do not use this space */ -#define SCTP_SOCKOPT_CONNECTX_OLD 107 /* CONNECTX old requests. */ -#define SCTP_GET_PEER_ADDRS 108 /* Get all peer address. */ -#define SCTP_GET_LOCAL_ADDRS 109 /* Get all local address. */ -#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ -#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ -#define SCTP_GET_ASSOC_STATS 112 /* Read only */ - -/* - * 5.2.1 SCTP Initiation Structure (SCTP_INIT) - * - * This cmsghdr structure provides information for initializing new - * SCTP associations with sendmsg(). The SCTP_INITMSG socket option - * uses this same data structure. This structure is not used for - * recvmsg(). - * - * cmsg_level cmsg_type cmsg_data[] - * ------------ ------------ ---------------------- - * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg - * - */ -struct sctp_initmsg { - __u16 sinit_num_ostreams; - __u16 sinit_max_instreams; - __u16 sinit_max_attempts; - __u16 sinit_max_init_timeo; -}; - -/* - * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * This cmsghdr structure specifies SCTP options for sendmsg() and - * describes SCTP header information about a received message through - * recvmsg(). - * - * cmsg_level cmsg_type cmsg_data[] - * ------------ ------------ ---------------------- - * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo - * - */ -struct sctp_sndrcvinfo { - __u16 sinfo_stream; - __u16 sinfo_ssn; - __u16 sinfo_flags; - __u32 sinfo_ppid; - __u32 sinfo_context; - __u32 sinfo_timetolive; - __u32 sinfo_tsn; - __u32 sinfo_cumtsn; - sctp_assoc_t sinfo_assoc_id; -}; - -/* - * sinfo_flags: 16 bits (unsigned integer) - * - * This field may contain any of the following flags and is composed of - * a bitwise OR of these values. - */ - -enum sctp_sinfo_flags { - SCTP_UNORDERED = 1, /* Send/receive message unordered. */ - SCTP_ADDR_OVER = 2, /* Override the primary destination. */ - SCTP_ABORT=4, /* Send an ABORT message to the peer. */ - SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ - SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ -}; - - -/* These are cmsg_types. */ -typedef enum sctp_cmsg_type { - SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */ - SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */ -} sctp_cmsg_t; - - -/* - * 5.3.1.1 SCTP_ASSOC_CHANGE - * - * Communication notifications inform the ULP that an SCTP association - * has either begun or ended. The identifier for a new association is - * provided by this notificaion. The notification information has the - * following format: - * - */ -struct sctp_assoc_change { - __u16 sac_type; - __u16 sac_flags; - __u32 sac_length; - __u16 sac_state; - __u16 sac_error; - __u16 sac_outbound_streams; - __u16 sac_inbound_streams; - sctp_assoc_t sac_assoc_id; - __u8 sac_info[0]; -}; - -/* - * sac_state: 32 bits (signed integer) - * - * This field holds one of a number of values that communicate the - * event that happened to the association. They include: - * - * Note: The following state names deviate from the API draft as - * the names clash too easily with other kernel symbols. - */ -enum sctp_sac_state { - SCTP_COMM_UP, - SCTP_COMM_LOST, - SCTP_RESTART, - SCTP_SHUTDOWN_COMP, - SCTP_CANT_STR_ASSOC, -}; - -/* - * 5.3.1.2 SCTP_PEER_ADDR_CHANGE - * - * When a destination address on a multi-homed peer encounters a change - * an interface details event is sent. The information has the - * following structure: - */ -struct sctp_paddr_change { - __u16 spc_type; - __u16 spc_flags; - __u32 spc_length; - struct sockaddr_storage spc_aaddr; - int spc_state; - int spc_error; - sctp_assoc_t spc_assoc_id; -} __attribute__((packed, aligned(4))); - -/* - * spc_state: 32 bits (signed integer) - * - * This field holds one of a number of values that communicate the - * event that happened to the address. They include: - */ -enum sctp_spc_state { - SCTP_ADDR_AVAILABLE, - SCTP_ADDR_UNREACHABLE, - SCTP_ADDR_REMOVED, - SCTP_ADDR_ADDED, - SCTP_ADDR_MADE_PRIM, - SCTP_ADDR_CONFIRMED, -}; - - -/* - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * A remote peer may send an Operational Error message to its peer. - * This message indicates a variety of error conditions on an - * association. The entire error TLV as it appears on the wire is - * included in a SCTP_REMOTE_ERROR event. Please refer to the SCTP - * specification [SCTP] and any extensions for a list of possible - * error formats. SCTP error TLVs have the format: - */ -struct sctp_remote_error { - __u16 sre_type; - __u16 sre_flags; - __u32 sre_length; - __u16 sre_error; - sctp_assoc_t sre_assoc_id; - __u8 sre_data[0]; -}; - - -/* - * 5.3.1.4 SCTP_SEND_FAILED - * - * If SCTP cannot deliver a message it may return the message as a - * notification. - */ -struct sctp_send_failed { - __u16 ssf_type; - __u16 ssf_flags; - __u32 ssf_length; - __u32 ssf_error; - struct sctp_sndrcvinfo ssf_info; - sctp_assoc_t ssf_assoc_id; - __u8 ssf_data[0]; -}; - -/* - * ssf_flags: 16 bits (unsigned integer) - * - * The flag value will take one of the following values - * - * SCTP_DATA_UNSENT - Indicates that the data was never put on - * the wire. - * - * SCTP_DATA_SENT - Indicates that the data was put on the wire. - * Note that this does not necessarily mean that the - * data was (or was not) successfully delivered. - */ -enum sctp_ssf_flags { - SCTP_DATA_UNSENT, - SCTP_DATA_SENT, -}; - -/* - * 5.3.1.5 SCTP_SHUTDOWN_EVENT - * - * When a peer sends a SHUTDOWN, SCTP delivers this notification to - * inform the application that it should cease sending data. - */ -struct sctp_shutdown_event { - __u16 sse_type; - __u16 sse_flags; - __u32 sse_length; - sctp_assoc_t sse_assoc_id; -}; - -/* - * 5.3.1.6 SCTP_ADAPTATION_INDICATION - * - * When a peer sends a Adaptation Layer Indication parameter , SCTP - * delivers this notification to inform the application - * that of the peers requested adaptation layer. - */ -struct sctp_adaptation_event { - __u16 sai_type; - __u16 sai_flags; - __u32 sai_length; - __u32 sai_adaptation_ind; - sctp_assoc_t sai_assoc_id; -}; - -/* - * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT - * - * When a receiver is engaged in a partial delivery of a - * message this notification will be used to indicate - * various events. - */ -struct sctp_pdapi_event { - __u16 pdapi_type; - __u16 pdapi_flags; - __u32 pdapi_length; - __u32 pdapi_indication; - sctp_assoc_t pdapi_assoc_id; -}; - -enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; - -struct sctp_authkey_event { - __u16 auth_type; - __u16 auth_flags; - __u32 auth_length; - __u16 auth_keynumber; - __u16 auth_altkeynumber; - __u32 auth_indication; - sctp_assoc_t auth_assoc_id; -}; - -enum { SCTP_AUTH_NEWKEY = 0, }; - -/* - * 6.1.9. SCTP_SENDER_DRY_EVENT - * - * When the SCTP stack has no more user data to send or retransmit, this - * notification is given to the user. Also, at the time when a user app - * subscribes to this event, if there is no data to be sent or - * retransmit, the stack will immediately send up this notification. - */ -struct sctp_sender_dry_event { - __u16 sender_dry_type; - __u16 sender_dry_flags; - __u32 sender_dry_length; - sctp_assoc_t sender_dry_assoc_id; -}; - -/* - * Described in Section 7.3 - * Ancillary Data and Notification Interest Options - */ -struct sctp_event_subscribe { - __u8 sctp_data_io_event; - __u8 sctp_association_event; - __u8 sctp_address_event; - __u8 sctp_send_failure_event; - __u8 sctp_peer_error_event; - __u8 sctp_shutdown_event; - __u8 sctp_partial_delivery_event; - __u8 sctp_adaptation_layer_event; - __u8 sctp_authentication_event; - __u8 sctp_sender_dry_event; -}; - -/* - * 5.3.1 SCTP Notification Structure - * - * The notification structure is defined as the union of all - * notification types. - * - */ -union sctp_notification { - struct { - __u16 sn_type; /* Notification type. */ - __u16 sn_flags; - __u32 sn_length; - } sn_header; - struct sctp_assoc_change sn_assoc_change; - struct sctp_paddr_change sn_paddr_change; - struct sctp_remote_error sn_remote_error; - struct sctp_send_failed sn_send_failed; - struct sctp_shutdown_event sn_shutdown_event; - struct sctp_adaptation_event sn_adaptation_event; - struct sctp_pdapi_event sn_pdapi_event; - struct sctp_authkey_event sn_authkey_event; - struct sctp_sender_dry_event sn_sender_dry_event; -}; - -/* Section 5.3.1 - * All standard values for sn_type flags are greater than 2^15. - * Values from 2^15 and down are reserved. - */ - -enum sctp_sn_type { - SCTP_SN_TYPE_BASE = (1<<15), - SCTP_ASSOC_CHANGE, - SCTP_PEER_ADDR_CHANGE, - SCTP_SEND_FAILED, - SCTP_REMOTE_ERROR, - SCTP_SHUTDOWN_EVENT, - SCTP_PARTIAL_DELIVERY_EVENT, - SCTP_ADAPTATION_INDICATION, - SCTP_AUTHENTICATION_EVENT, -#define SCTP_AUTHENTICATION_INDICATION SCTP_AUTHENTICATION_EVENT - SCTP_SENDER_DRY_EVENT, -}; - -/* Notification error codes used to fill up the error fields in some - * notifications. - * SCTP_PEER_ADDRESS_CHAGE : spc_error - * SCTP_ASSOC_CHANGE : sac_error - * These names should be potentially included in the draft 04 of the SCTP - * sockets API specification. - */ -typedef enum sctp_sn_error { - SCTP_FAILED_THRESHOLD, - SCTP_RECEIVED_SACK, - SCTP_HEARTBEAT_SUCCESS, - SCTP_RESPONSE_TO_USER_REQ, - SCTP_INTERNAL_ERROR, - SCTP_SHUTDOWN_GUARD_EXPIRES, - SCTP_PEER_FAULTY, -} sctp_sn_error_t; - -/* - * 7.1.1 Retransmission Timeout Parameters (SCTP_RTOINFO) - * - * The protocol parameters used to initialize and bound retransmission - * timeout (RTO) are tunable. See [SCTP] for more information on how - * these parameters are used in RTO calculation. - */ -struct sctp_rtoinfo { - sctp_assoc_t srto_assoc_id; - __u32 srto_initial; - __u32 srto_max; - __u32 srto_min; -}; - -/* - * 7.1.2 Association Parameters (SCTP_ASSOCINFO) - * - * This option is used to both examine and set various association and - * endpoint parameters. - */ -struct sctp_assocparams { - sctp_assoc_t sasoc_assoc_id; - __u16 sasoc_asocmaxrxt; - __u16 sasoc_number_peer_destinations; - __u32 sasoc_peer_rwnd; - __u32 sasoc_local_rwnd; - __u32 sasoc_cookie_life; -}; - -/* - * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR) - * - * Requests that the peer mark the enclosed address as the association - * primary. The enclosed address must be one of the association's - * locally bound addresses. The following structure is used to make a - * set primary request: - */ -struct sctp_setpeerprim { - sctp_assoc_t sspp_assoc_id; - struct sockaddr_storage sspp_addr; -} __attribute__((packed, aligned(4))); - -/* - * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) - * - * Requests that the local SCTP stack use the enclosed peer address as - * the association primary. The enclosed address must be one of the - * association peer's addresses. The following structure is used to - * make a set peer primary request: - */ -struct sctp_prim { - sctp_assoc_t ssp_assoc_id; - struct sockaddr_storage ssp_addr; -} __attribute__((packed, aligned(4))); - -/* - * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER) - * - * Requests that the local endpoint set the specified Adaptation Layer - * Indication parameter for all future INIT and INIT-ACK exchanges. - */ -struct sctp_setadaptation { - __u32 ssb_adaptation_ind; -}; - -/* - * 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS) - * - * Applications can enable or disable heartbeats for any peer address - * of an association, modify an address's heartbeat interval, force a - * heartbeat to be sent immediately, and adjust the address's maximum - * number of retransmissions sent before an address is considered - * unreachable. The following structure is used to access and modify an - * address's parameters: - */ -enum sctp_spp_flags { - SPP_HB_ENABLE = 1<<0, /*Enable heartbeats*/ - SPP_HB_DISABLE = 1<<1, /*Disable heartbeats*/ - SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, - SPP_HB_DEMAND = 1<<2, /*Send heartbeat immediately*/ - SPP_PMTUD_ENABLE = 1<<3, /*Enable PMTU discovery*/ - SPP_PMTUD_DISABLE = 1<<4, /*Disable PMTU discovery*/ - SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, - SPP_SACKDELAY_ENABLE = 1<<5, /*Enable SACK*/ - SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/ - SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, - SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */ -}; - -struct sctp_paddrparams { - sctp_assoc_t spp_assoc_id; - struct sockaddr_storage spp_address; - __u32 spp_hbinterval; - __u16 spp_pathmaxrxt; - __u32 spp_pathmtu; - __u32 spp_sackdelay; - __u32 spp_flags; -} __attribute__((packed, aligned(4))); - -/* - * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK) - * - * This set option adds a chunk type that the user is requesting to be - * received only in an authenticated way. Changes to the list of chunks - * will only effect future associations on the socket. - */ -struct sctp_authchunk { - __u8 sauth_chunk; -}; - -/* - * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT) - * - * This option gets or sets the list of HMAC algorithms that the local - * endpoint requires the peer to use. -*/ -struct sctp_hmacalgo { - __u32 shmac_num_idents; - __u16 shmac_idents[]; -}; - -/* - * 7.1.20. Set a shared key (SCTP_AUTH_KEY) - * - * This option will set a shared secret key which is used to build an - * association shared key. - */ -struct sctp_authkey { - sctp_assoc_t sca_assoc_id; - __u16 sca_keynumber; - __u16 sca_keylength; - __u8 sca_key[]; -}; - -/* - * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY) - * - * This option will get or set the active shared key to be used to build - * the association shared key. - */ - -struct sctp_authkeyid { - sctp_assoc_t scact_assoc_id; - __u16 scact_keynumber; -}; - - -/* - * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) - * - * This option will effect the way delayed acks are performed. This - * option allows you to get or set the delayed ack time, in - * milliseconds. It also allows changing the delayed ack frequency. - * Changing the frequency to 1 disables the delayed sack algorithm. If - * the assoc_id is 0, then this sets or gets the endpoints default - * values. If the assoc_id field is non-zero, then the set or get - * effects the specified association for the one to many model (the - * assoc_id field is ignored by the one to one model). Note that if - * sack_delay or sack_freq are 0 when setting this option, then the - * current values will remain unchanged. - */ -struct sctp_sack_info { - sctp_assoc_t sack_assoc_id; - uint32_t sack_delay; - uint32_t sack_freq; -}; - -struct sctp_assoc_value { - sctp_assoc_t assoc_id; - uint32_t assoc_value; -}; - -/* - * 7.2.2 Peer Address Information - * - * Applications can retrieve information about a specific peer address - * of an association, including its reachability state, congestion - * window, and retransmission timer values. This information is - * read-only. The following structure is used to access this - * information: - */ -struct sctp_paddrinfo { - sctp_assoc_t spinfo_assoc_id; - struct sockaddr_storage spinfo_address; - __s32 spinfo_state; - __u32 spinfo_cwnd; - __u32 spinfo_srtt; - __u32 spinfo_rto; - __u32 spinfo_mtu; -} __attribute__((packed, aligned(4))); - -/* Peer addresses's state. */ -/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x] - * calls. - * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters. - * Not yet confirmed by a heartbeat and not available for data - * transfers. - * ACTIVE : Peer address confirmed, active and available for data transfers. - * INACTIVE: Peer address inactive and not available for data transfers. - */ -enum sctp_spinfo_state { - SCTP_INACTIVE, - SCTP_PF, - SCTP_ACTIVE, - SCTP_UNCONFIRMED, - SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ -}; - -/* - * 7.2.1 Association Status (SCTP_STATUS) - * - * Applications can retrieve current status information about an - * association, including association state, peer receiver window size, - * number of unacked data chunks, and number of data chunks pending - * receipt. This information is read-only. The following structure is - * used to access this information: - */ -struct sctp_status { - sctp_assoc_t sstat_assoc_id; - __s32 sstat_state; - __u32 sstat_rwnd; - __u16 sstat_unackdata; - __u16 sstat_penddata; - __u16 sstat_instrms; - __u16 sstat_outstrms; - __u32 sstat_fragmentation_point; - struct sctp_paddrinfo sstat_primary; -}; - -/* - * 7.2.3. Get the list of chunks the peer requires to be authenticated - * (SCTP_PEER_AUTH_CHUNKS) - * - * This option gets a list of chunks for a specified association that - * the peer requires to be received authenticated only. - */ -struct sctp_authchunks { - sctp_assoc_t gauth_assoc_id; - __u32 gauth_number_of_chunks; - uint8_t gauth_chunks[]; -}; - -/* - * 8.2.6. Get the Current Identifiers of Associations - * (SCTP_GET_ASSOC_ID_LIST) - * - * This option gets the current list of SCTP association identifiers of - * the SCTP associations handled by a one-to-many style socket. - */ -struct sctp_assoc_ids { - __u32 gaids_number_of_ids; - sctp_assoc_t gaids_assoc_id[]; -}; - -/* - * 8.3, 8.5 get all peer/local addresses in an association. - * This parameter struct is used by SCTP_GET_PEER_ADDRS and - * SCTP_GET_LOCAL_ADDRS socket options used internally to implement - * sctp_getpaddrs() and sctp_getladdrs() API. - */ -struct sctp_getaddrs_old { - sctp_assoc_t assoc_id; - int addr_num; - struct sockaddr __user *addrs; -}; -struct sctp_getaddrs { - sctp_assoc_t assoc_id; /*input*/ - __u32 addr_num; /*output*/ - __u8 addrs[0]; /*output, variable size*/ -}; - -/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves - * association stats. All stats are counts except sas_maxrto and - * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since - * the last call. Will return 0 when RTO was not update since last call - */ -struct sctp_assoc_stats { - sctp_assoc_t sas_assoc_id; /* Input */ - /* Transport of observed max RTO */ - struct sockaddr_storage sas_obs_rto_ipaddr; - __u64 sas_maxrto; /* Maximum Observed RTO for period */ - __u64 sas_isacks; /* SACKs received */ - __u64 sas_osacks; /* SACKs sent */ - __u64 sas_opackets; /* Packets sent */ - __u64 sas_ipackets; /* Packets received */ - __u64 sas_rtxchunks; /* Retransmitted Chunks */ - __u64 sas_outofseqtsns;/* TSN received > next expected */ - __u64 sas_idupchunks; /* Dups received (ordered+unordered) */ - __u64 sas_gapcnt; /* Gap Acknowledgements Received */ - __u64 sas_ouodchunks; /* Unordered data chunks sent */ - __u64 sas_iuodchunks; /* Unordered data chunks received */ - __u64 sas_oodchunks; /* Ordered data chunks sent */ - __u64 sas_iodchunks; /* Ordered data chunks received */ - __u64 sas_octrlchunks; /* Control chunks sent */ - __u64 sas_ictrlchunks; /* Control chunks received */ -}; - -/* These are bit fields for msghdr->msg_flags. See section 5.1. */ -/* On user space Linux, these live in as an enum. */ -enum sctp_msg_flags { - MSG_NOTIFICATION = 0x8000, -#define MSG_NOTIFICATION MSG_NOTIFICATION -}; - -/* - * 8.1 sctp_bindx() - * - * The flags parameter is formed from the bitwise OR of zero or more of the - * following currently defined flags: - */ -#define SCTP_BINDX_ADD_ADDR 0x01 -#define SCTP_BINDX_REM_ADDR 0x02 - -/* This is the structure that is passed as an argument(optval) to - * getsockopt(SCTP_SOCKOPT_PEELOFF). - */ -typedef struct { - sctp_assoc_t associd; - int sd; -} sctp_peeloff_arg_t; - -/* - * Peer Address Thresholds socket option - */ -struct sctp_paddrthlds { - sctp_assoc_t spt_assoc_id; - struct sockaddr_storage spt_address; - __u16 spt_pathmaxrxt; - __u16 spt_pathpfthld; -}; -#endif /* __net_sctp_user_h__ */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5c8a1d25e21..7df19052533 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -331,6 +331,7 @@ header-y += rtnetlink.h header-y += scc.h header-y += sched.h header-y += screen_info.h +header-y += sctp.h header-y += sdla.h header-y += seccomp.h header-y += securebits.h diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h new file mode 100644 index 00000000000..66b466e4ca0 --- /dev/null +++ b/include/uapi/linux/sctp.h @@ -0,0 +1,846 @@ +/* SCTP kernel implementation + * (C) Copyright IBM Corp. 2001, 2004 + * Copyright (c) 1999-2000 Cisco, Inc. + * Copyright (c) 1999-2001 Motorola, Inc. + * Copyright (c) 2002 Intel Corp. + * + * This file is part of the SCTP kernel implementation + * + * This header represents the structures and constants needed to support + * the SCTP Extension to the Sockets API. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Please send any bug reports or fixes you make to the + * email address(es): + * lksctp developers + * + * Or submit a bug report through the following website: + * http://www.sf.net/projects/lksctp + * + * Written or modified by: + * La Monte H.P. Yarroll + * R. Stewart + * K. Morneau + * Q. Xie + * Karl Knutson + * Jon Grimm + * Daisy Chang + * Ryan Layer + * Ardelle Fan + * Sridhar Samudrala + * Inaky Perez-Gonzalez + * Vlad Yasevich + * + * Any bugs reported given to us we will try to fix... any fixes shared will + * be incorporated into the next SCTP release. + */ + +#ifndef _UAPI_SCTP_H +#define _UAPI_SCTP_H + +#include +#include + +typedef __s32 sctp_assoc_t; + +/* The following symbols come from the Sockets API Extensions for + * SCTP . + */ +#define SCTP_RTOINFO 0 +#define SCTP_ASSOCINFO 1 +#define SCTP_INITMSG 2 +#define SCTP_NODELAY 3 /* Get/set nodelay option. */ +#define SCTP_AUTOCLOSE 4 +#define SCTP_SET_PEER_PRIMARY_ADDR 5 +#define SCTP_PRIMARY_ADDR 6 +#define SCTP_ADAPTATION_LAYER 7 +#define SCTP_DISABLE_FRAGMENTS 8 +#define SCTP_PEER_ADDR_PARAMS 9 +#define SCTP_DEFAULT_SEND_PARAM 10 +#define SCTP_EVENTS 11 +#define SCTP_I_WANT_MAPPED_V4_ADDR 12 /* Turn on/off mapped v4 addresses */ +#define SCTP_MAXSEG 13 /* Get/set maximum fragment. */ +#define SCTP_STATUS 14 +#define SCTP_GET_PEER_ADDR_INFO 15 +#define SCTP_DELAYED_ACK_TIME 16 +#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME +#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME +#define SCTP_CONTEXT 17 +#define SCTP_FRAGMENT_INTERLEAVE 18 +#define SCTP_PARTIAL_DELIVERY_POINT 19 /* Set/Get partial delivery point */ +#define SCTP_MAX_BURST 20 /* Set/Get max burst */ +#define SCTP_AUTH_CHUNK 21 /* Set only: add a chunk type to authenticate */ +#define SCTP_HMAC_IDENT 22 +#define SCTP_AUTH_KEY 23 +#define SCTP_AUTH_ACTIVE_KEY 24 +#define SCTP_AUTH_DELETE_KEY 25 +#define SCTP_PEER_AUTH_CHUNKS 26 /* Read only */ +#define SCTP_LOCAL_AUTH_CHUNKS 27 /* Read only */ +#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ +#define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ +#define SCTP_AUTO_ASCONF 30 +#define SCTP_PEER_ADDR_THLDS 31 + +/* Internal Socket Options. Some of the sctp library functions are + * implemented using these socket options. + */ +#define SCTP_SOCKOPT_BINDX_ADD 100 /* BINDX requests for adding addrs */ +#define SCTP_SOCKOPT_BINDX_REM 101 /* BINDX requests for removing addrs. */ +#define SCTP_SOCKOPT_PEELOFF 102 /* peel off association. */ +/* Options 104-106 are deprecated and removed. Do not use this space */ +#define SCTP_SOCKOPT_CONNECTX_OLD 107 /* CONNECTX old requests. */ +#define SCTP_GET_PEER_ADDRS 108 /* Get all peer address. */ +#define SCTP_GET_LOCAL_ADDRS 109 /* Get all local address. */ +#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ +#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ +#define SCTP_GET_ASSOC_STATS 112 /* Read only */ + +/* + * 5.2.1 SCTP Initiation Structure (SCTP_INIT) + * + * This cmsghdr structure provides information for initializing new + * SCTP associations with sendmsg(). The SCTP_INITMSG socket option + * uses this same data structure. This structure is not used for + * recvmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ---------------------- + * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg + * + */ +struct sctp_initmsg { + __u16 sinit_num_ostreams; + __u16 sinit_max_instreams; + __u16 sinit_max_attempts; + __u16 sinit_max_init_timeo; +}; + +/* + * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) + * + * This cmsghdr structure specifies SCTP options for sendmsg() and + * describes SCTP header information about a received message through + * recvmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ---------------------- + * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo + * + */ +struct sctp_sndrcvinfo { + __u16 sinfo_stream; + __u16 sinfo_ssn; + __u16 sinfo_flags; + __u32 sinfo_ppid; + __u32 sinfo_context; + __u32 sinfo_timetolive; + __u32 sinfo_tsn; + __u32 sinfo_cumtsn; + sctp_assoc_t sinfo_assoc_id; +}; + +/* + * sinfo_flags: 16 bits (unsigned integer) + * + * This field may contain any of the following flags and is composed of + * a bitwise OR of these values. + */ + +enum sctp_sinfo_flags { + SCTP_UNORDERED = 1, /* Send/receive message unordered. */ + SCTP_ADDR_OVER = 2, /* Override the primary destination. */ + SCTP_ABORT=4, /* Send an ABORT message to the peer. */ + SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ + SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ +}; + +typedef union { + __u8 raw; + struct sctp_initmsg init; + struct sctp_sndrcvinfo sndrcv; +} sctp_cmsg_data_t; + +/* These are cmsg_types. */ +typedef enum sctp_cmsg_type { + SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */ +#define SCTP_INIT SCTP_INIT + SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */ +#define SCTP_SNDRCV SCTP_SNDRCV +} sctp_cmsg_t; + +/* + * 5.3.1.1 SCTP_ASSOC_CHANGE + * + * Communication notifications inform the ULP that an SCTP association + * has either begun or ended. The identifier for a new association is + * provided by this notificaion. The notification information has the + * following format: + * + */ +struct sctp_assoc_change { + __u16 sac_type; + __u16 sac_flags; + __u32 sac_length; + __u16 sac_state; + __u16 sac_error; + __u16 sac_outbound_streams; + __u16 sac_inbound_streams; + sctp_assoc_t sac_assoc_id; + __u8 sac_info[0]; +}; + +/* + * sac_state: 32 bits (signed integer) + * + * This field holds one of a number of values that communicate the + * event that happened to the association. They include: + * + * Note: The following state names deviate from the API draft as + * the names clash too easily with other kernel symbols. + */ +enum sctp_sac_state { + SCTP_COMM_UP, + SCTP_COMM_LOST, + SCTP_RESTART, + SCTP_SHUTDOWN_COMP, + SCTP_CANT_STR_ASSOC, +}; + +/* + * 5.3.1.2 SCTP_PEER_ADDR_CHANGE + * + * When a destination address on a multi-homed peer encounters a change + * an interface details event is sent. The information has the + * following structure: + */ +struct sctp_paddr_change { + __u16 spc_type; + __u16 spc_flags; + __u32 spc_length; + struct sockaddr_storage spc_aaddr; + int spc_state; + int spc_error; + sctp_assoc_t spc_assoc_id; +} __attribute__((packed, aligned(4))); + +/* + * spc_state: 32 bits (signed integer) + * + * This field holds one of a number of values that communicate the + * event that happened to the address. They include: + */ +enum sctp_spc_state { + SCTP_ADDR_AVAILABLE, + SCTP_ADDR_UNREACHABLE, + SCTP_ADDR_REMOVED, + SCTP_ADDR_ADDED, + SCTP_ADDR_MADE_PRIM, + SCTP_ADDR_CONFIRMED, +}; + + +/* + * 5.3.1.3 SCTP_REMOTE_ERROR + * + * A remote peer may send an Operational Error message to its peer. + * This message indicates a variety of error conditions on an + * association. The entire error TLV as it appears on the wire is + * included in a SCTP_REMOTE_ERROR event. Please refer to the SCTP + * specification [SCTP] and any extensions for a list of possible + * error formats. SCTP error TLVs have the format: + */ +struct sctp_remote_error { + __u16 sre_type; + __u16 sre_flags; + __u32 sre_length; + __u16 sre_error; + sctp_assoc_t sre_assoc_id; + __u8 sre_data[0]; +}; + + +/* + * 5.3.1.4 SCTP_SEND_FAILED + * + * If SCTP cannot deliver a message it may return the message as a + * notification. + */ +struct sctp_send_failed { + __u16 ssf_type; + __u16 ssf_flags; + __u32 ssf_length; + __u32 ssf_error; + struct sctp_sndrcvinfo ssf_info; + sctp_assoc_t ssf_assoc_id; + __u8 ssf_data[0]; +}; + +/* + * ssf_flags: 16 bits (unsigned integer) + * + * The flag value will take one of the following values + * + * SCTP_DATA_UNSENT - Indicates that the data was never put on + * the wire. + * + * SCTP_DATA_SENT - Indicates that the data was put on the wire. + * Note that this does not necessarily mean that the + * data was (or was not) successfully delivered. + */ +enum sctp_ssf_flags { + SCTP_DATA_UNSENT, + SCTP_DATA_SENT, +}; + +/* + * 5.3.1.5 SCTP_SHUTDOWN_EVENT + * + * When a peer sends a SHUTDOWN, SCTP delivers this notification to + * inform the application that it should cease sending data. + */ +struct sctp_shutdown_event { + __u16 sse_type; + __u16 sse_flags; + __u32 sse_length; + sctp_assoc_t sse_assoc_id; +}; + +/* + * 5.3.1.6 SCTP_ADAPTATION_INDICATION + * + * When a peer sends a Adaptation Layer Indication parameter , SCTP + * delivers this notification to inform the application + * that of the peers requested adaptation layer. + */ +struct sctp_adaptation_event { + __u16 sai_type; + __u16 sai_flags; + __u32 sai_length; + __u32 sai_adaptation_ind; + sctp_assoc_t sai_assoc_id; +}; + +/* + * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT + * + * When a receiver is engaged in a partial delivery of a + * message this notification will be used to indicate + * various events. + */ +struct sctp_pdapi_event { + __u16 pdapi_type; + __u16 pdapi_flags; + __u32 pdapi_length; + __u32 pdapi_indication; + sctp_assoc_t pdapi_assoc_id; +}; + +enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; + +/* + * 5.3.1.8. SCTP_AUTHENTICATION_EVENT + * + * When a receiver is using authentication this message will provide + * notifications regarding new keys being made active as well as errors. + */ +struct sctp_authkey_event { + __u16 auth_type; + __u16 auth_flags; + __u32 auth_length; + __u16 auth_keynumber; + __u16 auth_altkeynumber; + __u32 auth_indication; + sctp_assoc_t auth_assoc_id; +}; + +enum { SCTP_AUTH_NEWKEY = 0, }; + +/* + * 6.1.9. SCTP_SENDER_DRY_EVENT + * + * When the SCTP stack has no more user data to send or retransmit, this + * notification is given to the user. Also, at the time when a user app + * subscribes to this event, if there is no data to be sent or + * retransmit, the stack will immediately send up this notification. + */ +struct sctp_sender_dry_event { + __u16 sender_dry_type; + __u16 sender_dry_flags; + __u32 sender_dry_length; + sctp_assoc_t sender_dry_assoc_id; +}; + +/* + * Described in Section 7.3 + * Ancillary Data and Notification Interest Options + */ +struct sctp_event_subscribe { + __u8 sctp_data_io_event; + __u8 sctp_association_event; + __u8 sctp_address_event; + __u8 sctp_send_failure_event; + __u8 sctp_peer_error_event; + __u8 sctp_shutdown_event; + __u8 sctp_partial_delivery_event; + __u8 sctp_adaptation_layer_event; + __u8 sctp_authentication_event; + __u8 sctp_sender_dry_event; +}; + +/* + * 5.3.1 SCTP Notification Structure + * + * The notification structure is defined as the union of all + * notification types. + * + */ +union sctp_notification { + struct { + __u16 sn_type; /* Notification type. */ + __u16 sn_flags; + __u32 sn_length; + } sn_header; + struct sctp_assoc_change sn_assoc_change; + struct sctp_paddr_change sn_paddr_change; + struct sctp_remote_error sn_remote_error; + struct sctp_send_failed sn_send_failed; + struct sctp_shutdown_event sn_shutdown_event; + struct sctp_adaptation_event sn_adaptation_event; + struct sctp_pdapi_event sn_pdapi_event; + struct sctp_authkey_event sn_authkey_event; + struct sctp_sender_dry_event sn_sender_dry_event; +}; + +/* Section 5.3.1 + * All standard values for sn_type flags are greater than 2^15. + * Values from 2^15 and down are reserved. + */ + +enum sctp_sn_type { + SCTP_SN_TYPE_BASE = (1<<15), + SCTP_ASSOC_CHANGE, +#define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE + SCTP_PEER_ADDR_CHANGE, +#define SCTP_PEER_ADDR_CHANGE SCTP_PEER_ADDR_CHANGE + SCTP_SEND_FAILED, +#define SCTP_SEND_FAILED SCTP_SEND_FAILED + SCTP_REMOTE_ERROR, +#define SCTP_REMOTE_ERROR SCTP_REMOTE_ERROR + SCTP_SHUTDOWN_EVENT, +#define SCTP_SHUTDOWN_EVENT SCTP_SHUTDOWN_EVENT + SCTP_PARTIAL_DELIVERY_EVENT, +#define SCTP_PARTIAL_DELIVERY_EVENT SCTP_PARTIAL_DELIVERY_EVENT + SCTP_ADAPTATION_INDICATION, +#define SCTP_ADAPTATION_INDICATION SCTP_ADAPTATION_INDICATION + SCTP_AUTHENTICATION_EVENT, +#define SCTP_AUTHENTICATION_INDICATION SCTP_AUTHENTICATION_EVENT + SCTP_SENDER_DRY_EVENT, +#define SCTP_SENDER_DRY_EVENT SCTP_SENDER_DRY_EVENT +}; + +/* Notification error codes used to fill up the error fields in some + * notifications. + * SCTP_PEER_ADDRESS_CHAGE : spc_error + * SCTP_ASSOC_CHANGE : sac_error + * These names should be potentially included in the draft 04 of the SCTP + * sockets API specification. + */ +typedef enum sctp_sn_error { + SCTP_FAILED_THRESHOLD, + SCTP_RECEIVED_SACK, + SCTP_HEARTBEAT_SUCCESS, + SCTP_RESPONSE_TO_USER_REQ, + SCTP_INTERNAL_ERROR, + SCTP_SHUTDOWN_GUARD_EXPIRES, + SCTP_PEER_FAULTY, +} sctp_sn_error_t; + +/* + * 7.1.1 Retransmission Timeout Parameters (SCTP_RTOINFO) + * + * The protocol parameters used to initialize and bound retransmission + * timeout (RTO) are tunable. See [SCTP] for more information on how + * these parameters are used in RTO calculation. + */ +struct sctp_rtoinfo { + sctp_assoc_t srto_assoc_id; + __u32 srto_initial; + __u32 srto_max; + __u32 srto_min; +}; + +/* + * 7.1.2 Association Parameters (SCTP_ASSOCINFO) + * + * This option is used to both examine and set various association and + * endpoint parameters. + */ +struct sctp_assocparams { + sctp_assoc_t sasoc_assoc_id; + __u16 sasoc_asocmaxrxt; + __u16 sasoc_number_peer_destinations; + __u32 sasoc_peer_rwnd; + __u32 sasoc_local_rwnd; + __u32 sasoc_cookie_life; +}; + +/* + * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR) + * + * Requests that the peer mark the enclosed address as the association + * primary. The enclosed address must be one of the association's + * locally bound addresses. The following structure is used to make a + * set primary request: + */ +struct sctp_setpeerprim { + sctp_assoc_t sspp_assoc_id; + struct sockaddr_storage sspp_addr; +} __attribute__((packed, aligned(4))); + +/* + * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) + * + * Requests that the local SCTP stack use the enclosed peer address as + * the association primary. The enclosed address must be one of the + * association peer's addresses. The following structure is used to + * make a set peer primary request: + */ +struct sctp_prim { + sctp_assoc_t ssp_assoc_id; + struct sockaddr_storage ssp_addr; +} __attribute__((packed, aligned(4))); + +/* For backward compatibility use, define the old name too */ +#define sctp_setprim sctp_prim + +/* + * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER) + * + * Requests that the local endpoint set the specified Adaptation Layer + * Indication parameter for all future INIT and INIT-ACK exchanges. + */ +struct sctp_setadaptation { + __u32 ssb_adaptation_ind; +}; + +/* + * 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS) + * + * Applications can enable or disable heartbeats for any peer address + * of an association, modify an address's heartbeat interval, force a + * heartbeat to be sent immediately, and adjust the address's maximum + * number of retransmissions sent before an address is considered + * unreachable. The following structure is used to access and modify an + * address's parameters: + */ +enum sctp_spp_flags { + SPP_HB_ENABLE = 1<<0, /*Enable heartbeats*/ + SPP_HB_DISABLE = 1<<1, /*Disable heartbeats*/ + SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, + SPP_HB_DEMAND = 1<<2, /*Send heartbeat immediately*/ + SPP_PMTUD_ENABLE = 1<<3, /*Enable PMTU discovery*/ + SPP_PMTUD_DISABLE = 1<<4, /*Disable PMTU discovery*/ + SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, + SPP_SACKDELAY_ENABLE = 1<<5, /*Enable SACK*/ + SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/ + SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, + SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */ +}; + +struct sctp_paddrparams { + sctp_assoc_t spp_assoc_id; + struct sockaddr_storage spp_address; + __u32 spp_hbinterval; + __u16 spp_pathmaxrxt; + __u32 spp_pathmtu; + __u32 spp_sackdelay; + __u32 spp_flags; +} __attribute__((packed, aligned(4))); + +/* + * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK) + * + * This set option adds a chunk type that the user is requesting to be + * received only in an authenticated way. Changes to the list of chunks + * will only effect future associations on the socket. + */ +struct sctp_authchunk { + __u8 sauth_chunk; +}; + +/* + * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT) + * + * This option gets or sets the list of HMAC algorithms that the local + * endpoint requires the peer to use. + */ +#ifndef __KERNEL__ +/* This here is only used by user space as is. It might not be a good idea + * to export/reveal the whole structure with reserved fields etc. + */ +enum { + SCTP_AUTH_HMAC_ID_SHA1 = 1, + SCTP_AUTH_HMAC_ID_SHA256 = 3, +}; +#endif + +struct sctp_hmacalgo { + __u32 shmac_num_idents; + __u16 shmac_idents[]; +}; + +/* Sadly, user and kernel space have different names for + * this structure member, so this is to not break anything. + */ +#define shmac_number_of_idents shmac_num_idents + +/* + * 7.1.20. Set a shared key (SCTP_AUTH_KEY) + * + * This option will set a shared secret key which is used to build an + * association shared key. + */ +struct sctp_authkey { + sctp_assoc_t sca_assoc_id; + __u16 sca_keynumber; + __u16 sca_keylength; + __u8 sca_key[]; +}; + +/* + * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY) + * + * This option will get or set the active shared key to be used to build + * the association shared key. + */ + +struct sctp_authkeyid { + sctp_assoc_t scact_assoc_id; + __u16 scact_keynumber; +}; + + +/* + * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) + * + * This option will effect the way delayed acks are performed. This + * option allows you to get or set the delayed ack time, in + * milliseconds. It also allows changing the delayed ack frequency. + * Changing the frequency to 1 disables the delayed sack algorithm. If + * the assoc_id is 0, then this sets or gets the endpoints default + * values. If the assoc_id field is non-zero, then the set or get + * effects the specified association for the one to many model (the + * assoc_id field is ignored by the one to one model). Note that if + * sack_delay or sack_freq are 0 when setting this option, then the + * current values will remain unchanged. + */ +struct sctp_sack_info { + sctp_assoc_t sack_assoc_id; + uint32_t sack_delay; + uint32_t sack_freq; +}; + +struct sctp_assoc_value { + sctp_assoc_t assoc_id; + uint32_t assoc_value; +}; + +/* + * 7.2.2 Peer Address Information + * + * Applications can retrieve information about a specific peer address + * of an association, including its reachability state, congestion + * window, and retransmission timer values. This information is + * read-only. The following structure is used to access this + * information: + */ +struct sctp_paddrinfo { + sctp_assoc_t spinfo_assoc_id; + struct sockaddr_storage spinfo_address; + __s32 spinfo_state; + __u32 spinfo_cwnd; + __u32 spinfo_srtt; + __u32 spinfo_rto; + __u32 spinfo_mtu; +} __attribute__((packed, aligned(4))); + +/* Peer addresses's state. */ +/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x] + * calls. + * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters. + * Not yet confirmed by a heartbeat and not available for data + * transfers. + * ACTIVE : Peer address confirmed, active and available for data transfers. + * INACTIVE: Peer address inactive and not available for data transfers. + */ +enum sctp_spinfo_state { + SCTP_INACTIVE, + SCTP_PF, + SCTP_ACTIVE, + SCTP_UNCONFIRMED, + SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ +}; + +/* + * 7.2.1 Association Status (SCTP_STATUS) + * + * Applications can retrieve current status information about an + * association, including association state, peer receiver window size, + * number of unacked data chunks, and number of data chunks pending + * receipt. This information is read-only. The following structure is + * used to access this information: + */ +struct sctp_status { + sctp_assoc_t sstat_assoc_id; + __s32 sstat_state; + __u32 sstat_rwnd; + __u16 sstat_unackdata; + __u16 sstat_penddata; + __u16 sstat_instrms; + __u16 sstat_outstrms; + __u32 sstat_fragmentation_point; + struct sctp_paddrinfo sstat_primary; +}; + +/* + * 7.2.3. Get the list of chunks the peer requires to be authenticated + * (SCTP_PEER_AUTH_CHUNKS) + * + * This option gets a list of chunks for a specified association that + * the peer requires to be received authenticated only. + */ +struct sctp_authchunks { + sctp_assoc_t gauth_assoc_id; + __u32 gauth_number_of_chunks; + uint8_t gauth_chunks[]; +}; + +/* The broken spelling has been released already in lksctp-tools header, + * so don't break anyone, now that it's fixed. + */ +#define guth_number_of_chunks gauth_number_of_chunks + +/* Association states. */ +enum sctp_sstat_state { + SCTP_EMPTY = 0, + SCTP_CLOSED = 1, + SCTP_COOKIE_WAIT = 2, + SCTP_COOKIE_ECHOED = 3, + SCTP_ESTABLISHED = 4, + SCTP_SHUTDOWN_PENDING = 5, + SCTP_SHUTDOWN_SENT = 6, + SCTP_SHUTDOWN_RECEIVED = 7, + SCTP_SHUTDOWN_ACK_SENT = 8, +}; + +/* + * 8.2.6. Get the Current Identifiers of Associations + * (SCTP_GET_ASSOC_ID_LIST) + * + * This option gets the current list of SCTP association identifiers of + * the SCTP associations handled by a one-to-many style socket. + */ +struct sctp_assoc_ids { + __u32 gaids_number_of_ids; + sctp_assoc_t gaids_assoc_id[]; +}; + +/* + * 8.3, 8.5 get all peer/local addresses in an association. + * This parameter struct is used by SCTP_GET_PEER_ADDRS and + * SCTP_GET_LOCAL_ADDRS socket options used internally to implement + * sctp_getpaddrs() and sctp_getladdrs() API. + */ +struct sctp_getaddrs_old { + sctp_assoc_t assoc_id; + int addr_num; +#ifdef __KERNEL__ + struct sockaddr __user *addrs; +#else + struct sockaddr *addrs; +#endif +}; + +struct sctp_getaddrs { + sctp_assoc_t assoc_id; /*input*/ + __u32 addr_num; /*output*/ + __u8 addrs[0]; /*output, variable size*/ +}; + +/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves + * association stats. All stats are counts except sas_maxrto and + * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since + * the last call. Will return 0 when RTO was not update since last call + */ +struct sctp_assoc_stats { + sctp_assoc_t sas_assoc_id; /* Input */ + /* Transport of observed max RTO */ + struct sockaddr_storage sas_obs_rto_ipaddr; + __u64 sas_maxrto; /* Maximum Observed RTO for period */ + __u64 sas_isacks; /* SACKs received */ + __u64 sas_osacks; /* SACKs sent */ + __u64 sas_opackets; /* Packets sent */ + __u64 sas_ipackets; /* Packets received */ + __u64 sas_rtxchunks; /* Retransmitted Chunks */ + __u64 sas_outofseqtsns;/* TSN received > next expected */ + __u64 sas_idupchunks; /* Dups received (ordered+unordered) */ + __u64 sas_gapcnt; /* Gap Acknowledgements Received */ + __u64 sas_ouodchunks; /* Unordered data chunks sent */ + __u64 sas_iuodchunks; /* Unordered data chunks received */ + __u64 sas_oodchunks; /* Ordered data chunks sent */ + __u64 sas_iodchunks; /* Ordered data chunks received */ + __u64 sas_octrlchunks; /* Control chunks sent */ + __u64 sas_ictrlchunks; /* Control chunks received */ +}; + +/* These are bit fields for msghdr->msg_flags. See section 5.1. */ +/* On user space Linux, these live in as an enum. */ +enum sctp_msg_flags { + MSG_NOTIFICATION = 0x8000, +#define MSG_NOTIFICATION MSG_NOTIFICATION +}; + +/* + * 8.1 sctp_bindx() + * + * The flags parameter is formed from the bitwise OR of zero or more of the + * following currently defined flags: + */ +#define SCTP_BINDX_ADD_ADDR 0x01 +#define SCTP_BINDX_REM_ADDR 0x02 + +/* This is the structure that is passed as an argument(optval) to + * getsockopt(SCTP_SOCKOPT_PEELOFF). + */ +typedef struct { + sctp_assoc_t associd; + int sd; +} sctp_peeloff_arg_t; + +/* + * Peer Address Thresholds socket option + */ +struct sctp_paddrthlds { + sctp_assoc_t spt_assoc_id; + struct sockaddr_storage spt_address; + __u16 spt_pathmaxrxt; + __u16 spt_pathpfthld; +}; + +#endif /* _UAPI_SCTP_H */ -- cgit v1.2.3-70-g09d2 From 0ca54f6c5fd4ce58aa044d1fc7f00d7f6cf2801c Mon Sep 17 00:00:00 2001 From: Marek Puzyniak Date: Wed, 10 Apr 2013 13:19:13 +0200 Subject: mac80211: provide SSID in IBSS mode Some drivers need SSID in AP and IBSS mode. AP SSID is provided through BSS_CHANGED_SSID notification. There was no easy way to do the same for IBSS. In IBSS mode SSID is known but was not stored in BSS configuration. Extend the AP-mode functionality to also work in IBSS mode. Signed-off-by: Marek Puzyniak Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- net/mac80211/ibss.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 64faf015dd1..0dde213dd3b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -209,7 +209,7 @@ struct ieee80211_chanctx_conf { * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. - * @BSS_CHANGED_SSID: SSID changed for this BSS (AP mode) + * @BSS_CHANGED_SSID: SSID changed for this BSS (AP and IBSS mode) * @BSS_CHANGED_AP_PROBE_RESP: Probe Response changed for this BSS (AP mode) * @BSS_CHANGED_PS: PS changed for this BSS (STA mode) * @BSS_CHANGED_TXPOWER: TX power setting changed for this interface @@ -326,7 +326,7 @@ enum ieee80211_rssi_event { * your driver/device needs to do. * @ps: power-save mode (STA only). This flag is NOT affected by * offchannel/dynamic_ps operations. - * @ssid: The SSID of the current vif. Only valid in AP-mode. + * @ssid: The SSID of the current vif. Valid in AP and IBSS mode. * @ssid_len: Length of SSID given in @ssid. * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode. * @txpower: TX power in dBm diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 2a0b2186d98..b7bf6d76f1d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -209,6 +209,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; + sdata->vif.bss_conf.ssid_len = ifibss->ssid_len; + memcpy(sdata->vif.bss_conf.ssid, ifibss->ssid, ifibss->ssid_len); bss_change = BSS_CHANGED_BEACON_INT; bss_change |= ieee80211_reset_erp_info(sdata); bss_change |= BSS_CHANGED_BSSID; @@ -217,6 +219,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss_change |= BSS_CHANGED_BASIC_RATES; bss_change |= BSS_CHANGED_HT; bss_change |= BSS_CHANGED_IBSS; + bss_change |= BSS_CHANGED_SSID; /* * In 5 GHz/802.11a, we can always use short slot time. @@ -1159,6 +1162,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; sdata->vif.bss_conf.enable_beacon = false; + sdata->vif.bss_conf.ssid_len = 0; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); -- cgit v1.2.3-70-g09d2 From 064f370c5fd982e1264c03f5b704e00f5e41eb36 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 2 Apr 2013 10:25:16 +0200 Subject: NFC: llcp: Add support in getsockopt for RW, LTO, and MIU remote parameters Useful for LLCP validation tests. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 7 +++++-- net/nfc/llcp/sock.c | 23 ++++++++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 7440bc81a04..7c6f627a717 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -233,7 +233,10 @@ struct sockaddr_nfc_llcp { #define NFC_LLCP_DIRECTION_TX 0x01 /* socket option names */ -#define NFC_LLCP_RW 0 -#define NFC_LLCP_MIUX 1 +#define NFC_LLCP_RW 0 +#define NFC_LLCP_MIUX 1 +#define NFC_LLCP_REMOTE_MIU 2 +#define NFC_LLCP_REMOTE_LTO 3 +#define NFC_LLCP_REMOTE_RW 4 #endif /*__LINUX_NFC_H */ diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 641c535be3d..fd01ac6e0bf 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -303,7 +303,7 @@ static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); int len, err = 0; - u16 miux; + u16 miux, remote_miu; u8 rw; pr_debug("%p optname %d\n", sk, optname); @@ -339,6 +339,27 @@ static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname, break; + case NFC_LLCP_REMOTE_MIU: + remote_miu = llcp_sock->remote_miu > LLCP_MAX_MIU ? + local->remote_miu : llcp_sock->remote_miu; + + if (put_user(remote_miu, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case NFC_LLCP_REMOTE_LTO: + if (put_user(local->remote_lto / 10, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case NFC_LLCP_REMOTE_RW: + if (put_user(llcp_sock->remote_rw, (u32 __user *) optval)) + err = -EFAULT; + + break; + default: err = -ENOPROTOOPT; break; -- cgit v1.2.3-70-g09d2 From 76a68ba0ae097be72dfa8f918b3139130da769a4 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:37 +0200 Subject: Bluetooth: rename hci_conn_put to hci_conn_drop We use _get() and _put() for device ref-counting in the kernel. However, hci_conn_put() is _not_ used for ref-counting, hence, rename it to hci_conn_drop() so we can later fix ref-counting and introduce hci_conn_put(). hci_conn_hold() and hci_conn_put() are currently used to manage how long a connection should be held alive. When the last user drops the connection, we spawn a delayed work that performs the disconnect. Obviously, this has nothing to do with ref-counting for the _object_ but rather for the keep-alive of the connection. But we really _need_ proper ref-counting for the _object_ to allow connection-users like rfcomm-tty, HIDP or others. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 6 +++--- net/bluetooth/hci_event.c | 36 ++++++++++++++++++------------------ net/bluetooth/l2cap_core.c | 6 +++--- net/bluetooth/mgmt.c | 6 +++--- net/bluetooth/sco.c | 6 +++--- net/bluetooth/smp.c | 2 +- 7 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d4e13bf5ae5..78ea9c7c202 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -612,7 +612,7 @@ static inline void hci_conn_hold(struct hci_conn *conn) cancel_delayed_work(&conn->disc_work); } -static inline void hci_conn_put(struct hci_conn *conn) +static inline void hci_conn_drop(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b9f90169940..30d7dfc2300 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -433,7 +433,7 @@ int hci_conn_del(struct hci_conn *conn) struct hci_conn *acl = conn->link; if (acl) { acl->link = NULL; - hci_conn_put(acl); + hci_conn_drop(acl); } } @@ -565,7 +565,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, if (!sco) { sco = hci_conn_add(hdev, type, dst); if (!sco) { - hci_conn_put(acl); + hci_conn_drop(acl); return ERR_PTR(-ENOMEM); } } @@ -980,7 +980,7 @@ void hci_chan_del(struct hci_chan *chan) synchronize_rcu(); - hci_conn_put(conn); + hci_conn_drop(conn); skb_queue_purge(&chan->data_q); kfree(chan); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0a2b128d2cc..2cf28b198b3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1190,7 +1190,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1217,7 +1217,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1379,7 +1379,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1406,7 +1406,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status) if (conn) { if (conn->state == BT_CONFIG) { hci_proto_connect_cfm(conn, status); - hci_conn_put(conn); + hci_conn_drop(conn); } } @@ -1860,7 +1860,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) } else { conn->state = BT_CONNECT2; hci_proto_connect_cfm(conn, 0); - hci_conn_put(conn); + hci_conn_drop(conn); } } else { /* Connection rejected */ @@ -1967,14 +1967,14 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } else { conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } } else { hci_auth_cfm(conn, ev->status); hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; - hci_conn_put(conn); + hci_conn_drop(conn); } if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) { @@ -2058,7 +2058,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->status && conn->state == BT_CONNECTED) { hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); - hci_conn_put(conn); + hci_conn_drop(conn); goto unlock; } @@ -2067,7 +2067,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } else hci_encrypt_cfm(conn, ev->status, ev->encrypt); } @@ -2142,7 +2142,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } unlock: @@ -2682,7 +2682,7 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->state == BT_CONNECTED) { hci_conn_hold(conn); conn->disc_timeout = HCI_PAIRING_TIMEOUT; - hci_conn_put(conn); + hci_conn_drop(conn); } if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags)) @@ -2785,7 +2785,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->key_type != HCI_LK_CHANGED_COMBINATION) conn->key_type = ev->key_type; - hci_conn_put(conn); + hci_conn_drop(conn); } if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) @@ -2954,7 +2954,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } unlock: @@ -3087,7 +3087,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (ev->status && conn->state == BT_CONNECTED) { hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); - hci_conn_put(conn); + hci_conn_drop(conn); goto unlock; } @@ -3096,13 +3096,13 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); } else { hci_auth_cfm(conn, ev->status); hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; - hci_conn_put(conn); + hci_conn_drop(conn); } unlock: @@ -3363,7 +3363,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); - hci_conn_put(conn); + hci_conn_drop(conn); unlock: hci_dev_unlock(hdev); @@ -3451,7 +3451,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hci_conn_hold(hcon); hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - hci_conn_put(hcon); + hci_conn_drop(hcon); hci_conn_hold_device(hcon); hci_conn_add_sysfs(hcon); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7c7e9321f1e..7cdb93c21b3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -571,7 +571,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) chan->conn = NULL; if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP) - hci_conn_put(conn->hcon); + hci_conn_drop(conn->hcon); if (mgr && mgr->bredr_chan == chan) mgr->bredr_chan = NULL; @@ -1697,7 +1697,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, conn = l2cap_conn_add(hcon, 0); if (!conn) { - hci_conn_put(hcon); + hci_conn_drop(hcon); err = -ENOMEM; goto done; } @@ -1707,7 +1707,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, if (!list_empty(&conn->chan_l)) { err = -EBUSY; - hci_conn_put(hcon); + hci_conn_drop(hcon); } if (err) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 03e7e732215..34ba1647e6e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2131,7 +2131,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) conn->security_cfm_cb = NULL; conn->disconn_cfm_cb = NULL; - hci_conn_put(conn); + hci_conn_drop(conn); mgmt_pending_remove(cmd); } @@ -2222,7 +2222,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, } if (conn->connect_cfm_cb) { - hci_conn_put(conn); + hci_conn_drop(conn); err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto unlock; @@ -2231,7 +2231,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len); if (!cmd) { err = -ENOMEM; - hci_conn_put(conn); + hci_conn_drop(conn); goto unlock; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d919d1161ab..9909eec6afe 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -185,7 +185,7 @@ static int sco_connect(struct sock *sk) conn = sco_conn_add(hcon); if (!conn) { - hci_conn_put(hcon); + hci_conn_drop(hcon); err = -ENOMEM; goto done; } @@ -353,7 +353,7 @@ static void __sco_sock_close(struct sock *sk) if (sco_pi(sk)->conn->hcon) { sk->sk_state = BT_DISCONN; sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); - hci_conn_put(sco_pi(sk)->conn->hcon); + hci_conn_drop(sco_pi(sk)->conn->hcon); sco_pi(sk)->conn->hcon = NULL; } else sco_chan_del(sk, ECONNRESET); @@ -882,7 +882,7 @@ static void sco_chan_del(struct sock *sk, int err) sco_conn_unlock(conn); if (conn->hcon) - hci_conn_put(conn->hcon); + hci_conn_drop(conn->hcon); } sk->sk_state = BT_CLOSED; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 5abefb12891..b2296d3857a 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -522,7 +522,7 @@ void smp_chan_destroy(struct l2cap_conn *conn) kfree(smp); conn->smp_chan = NULL; conn->hcon->smp_conn = NULL; - hci_conn_put(conn->hcon); + hci_conn_drop(conn->hcon); } int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) -- cgit v1.2.3-70-g09d2 From ac64995da872c8ae6f74a5556fdad565829985b0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 11 Apr 2013 04:40:30 +0000 Subject: usbnet: introduce usbnet_link_change API This patch introduces the API of usbnet_link_change, so that usbnet can handle link change centrally, which may help to implement killing traffic URBs for saving USB bus bandwidth and host controller power. Signed-off-by: Ming Lei Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 13 +++++++++++++ include/linux/usb/usbnet.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 51f3192f393..40e4237b0a3 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1653,6 +1653,19 @@ int usbnet_manage_power(struct usbnet *dev, int on) } EXPORT_SYMBOL(usbnet_manage_power); +void usbnet_link_change(struct usbnet *dev, bool link, bool need_reset) +{ + /* update link after link is reseted */ + if (link && !need_reset) + netif_carrier_on(dev->net); + else + netif_carrier_off(dev->net); + + if (need_reset && link) + usbnet_defer_kevent(dev, EVENT_LINK_RESET); +} +EXPORT_SYMBOL(usbnet_link_change); + /*-------------------------------------------------------------------------*/ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, u16 value, u16 index, void *data, u16 size) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0e5ac93bab1..eb021b8f53b 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -245,5 +245,6 @@ extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); extern int usbnet_nway_reset(struct net_device *net); extern int usbnet_manage_power(struct usbnet *, int); +extern void usbnet_link_change(struct usbnet *, bool, bool); #endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3-70-g09d2 From 4b49f58fff00e6e9b24eaa31d4c6324393d76b0a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 11 Apr 2013 04:40:40 +0000 Subject: usbnet: handle link change The link change is detected via the interrupt pipe, and bulk pipes are responsible for transfering packets, so it is reasonable to stop bulk transfer after link is reported as off. Two adavantages may be obtained with stopping bulk transfer after link becomes off: - USB bus bandwidth is saved(USB bus is shared bus except for USB3.0), for example, lots of 'IN' token packets and 'NYET' handshake packets is transfered on 2.0 bus. - probabaly power might be saved for usb host controller since cancelling bulk transfer may disable the asynchronous schedule of host controller. With this patch, when link becomes off, about ~10% performance boost can be found on bulk transfer of anther usb device which is attached to same bus with the usbnet device, see below test on next-20130410: - read from usb mass storage(Sandisk Extreme USB 3.0) on pandaboard with below command after unplugging ethernet cable: dd if=/dev/sda iflag=direct of=/dev/null bs=1M count=800 - without the patch 1, 838860800 bytes (839 MB) copied, 36.2216 s, 23.2 MB/s 2, 838860800 bytes (839 MB) copied, 35.8368 s, 23.4 MB/s 3, 838860800 bytes (839 MB) copied, 35.823 s, 23.4 MB/s 4, 838860800 bytes (839 MB) copied, 35.937 s, 23.3 MB/s 5, 838860800 bytes (839 MB) copied, 35.7365 s, 23.5 MB/s average: 23.6MB/s - with the patch 1, 838860800 bytes (839 MB) copied, 32.3817 s, 25.9 MB/s 2, 838860800 bytes (839 MB) copied, 31.7389 s, 26.4 MB/s 3, 838860800 bytes (839 MB) copied, 32.438 s, 25.9 MB/s 4, 838860800 bytes (839 MB) copied, 32.5492 s, 25.8 MB/s 5, 838860800 bytes (839 MB) copied, 31.6178 s, 26.5 MB/s average: 26.1MB/s Signed-off-by: Ming Lei Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 30 ++++++++++++++++++++++++++++++ include/linux/usb/usbnet.h | 1 + 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 34e425264a7..1e5a9b72650 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -938,6 +938,27 @@ static const struct ethtool_ops usbnet_ethtool_ops = { /*-------------------------------------------------------------------------*/ +static void __handle_link_change(struct usbnet *dev) +{ + if (!test_bit(EVENT_DEV_OPEN, &dev->flags)) + return; + + if (!netif_carrier_ok(dev->net)) { + /* kill URBs for reading packets to save bus bandwidth */ + unlink_urbs(dev, &dev->rxq); + + /* + * tx_timeout will unlink URBs for sending packets and + * tx queue is stopped by netcore after link becomes off + */ + } else { + /* submitting URBs for reading packets */ + tasklet_schedule(&dev->bh); + } + + clear_bit(EVENT_LINK_CHANGE, &dev->flags); +} + /* work that cannot be done in interrupt context uses keventd. * * NOTE: with 2.5 we could do more of this using completion callbacks, @@ -1035,8 +1056,14 @@ skip_reset: } else { usb_autopm_put_interface(dev->intf); } + + /* handle link change from link resetting */ + __handle_link_change(dev); } + if (test_bit (EVENT_LINK_CHANGE, &dev->flags)) + __handle_link_change(dev); + if (dev->flags) netdev_dbg(dev->net, "kevent done, flags = 0x%lx\n", dev->flags); } @@ -1286,6 +1313,7 @@ static void usbnet_bh (unsigned long param) // or are we maybe short a few urbs? } else if (netif_running (dev->net) && netif_device_present (dev->net) && + netif_carrier_ok(dev->net) && !timer_pending (&dev->delay) && !test_bit (EVENT_RX_HALT, &dev->flags)) { int temp = dev->rxq.qlen; @@ -1663,6 +1691,8 @@ void usbnet_link_change(struct usbnet *dev, bool link, bool need_reset) if (need_reset && link) usbnet_defer_kevent(dev, EVENT_LINK_RESET); + else + usbnet_defer_kevent(dev, EVENT_LINK_CHANGE); } EXPORT_SYMBOL(usbnet_link_change); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index eb021b8f53b..da46327fca1 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -72,6 +72,7 @@ struct usbnet { # define EVENT_DEVICE_REPORT_IDLE 8 # define EVENT_NO_RUNTIME_PM 9 # define EVENT_RX_KILL 10 +# define EVENT_LINK_CHANGE 11 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- cgit v1.2.3-70-g09d2 From 44b3decb414919760c7327df05e63372c1bf5d9a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 11 Apr 2013 11:51:36 +0200 Subject: rfkill: Add NFC to the list of supported radios And return the proper string for it. Acked-by: Johannes Berg Acked-by: Marcel Holtmann Signed-off-by: Samuel Ortiz --- include/uapi/linux/rfkill.h | 2 ++ net/rfkill/core.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 2753c6cc974..058757f7a73 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -37,6 +37,7 @@ * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device. * @RFKILL_TYPE_GPS: switch is on a GPS device. * @RFKILL_TYPE_FM: switch is on a FM radio device. + * @RFKILL_TYPE_NFC: switch is on an NFC device. * @NUM_RFKILL_TYPES: number of defined rfkill types */ enum rfkill_type { @@ -48,6 +49,7 @@ enum rfkill_type { RFKILL_TYPE_WWAN, RFKILL_TYPE_GPS, RFKILL_TYPE_FM, + RFKILL_TYPE_NFC, NUM_RFKILL_TYPES, }; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 9b9be5279f5..1cec5e4f3a5 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -587,7 +587,7 @@ static ssize_t rfkill_name_show(struct device *dev, static const char *rfkill_get_type_str(enum rfkill_type type) { - BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1); + BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1); switch (type) { case RFKILL_TYPE_WLAN: @@ -604,6 +604,8 @@ static const char *rfkill_get_type_str(enum rfkill_type type) return "gps"; case RFKILL_TYPE_FM: return "fm"; + case RFKILL_TYPE_NFC: + return "nfc"; default: BUG(); } -- cgit v1.2.3-70-g09d2 From be055b2f89b5842f41363b5655a33dffb51a8294 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 11 Apr 2013 11:52:20 +0200 Subject: NFC: RFKILL support All NFC devices will now get proper RFKILL support as long as they provide some dev_up and dev_down hooks. Rfkilling an NFC device will bring it down while it is left to userspace to bring it back up when being rfkill unblocked. This is very similar to what Bluetooth does. Acked-by: Marcel Holtmann Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 2 ++ net/nfc/core.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 87a6417fc93..5eb80bb3cbb 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -122,6 +122,8 @@ struct nfc_dev { bool shutting_down; + struct rfkill *rfkill; + struct nfc_ops *ops; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) diff --git a/net/nfc/core.c b/net/nfc/core.c index c571ca9a960..40d2527693d 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,11 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); + if (dev->rfkill && rfkill_blocked(dev->rfkill)) { + rc = -ERFKILL; + goto error; + } + if (!device_is_registered(&dev->dev)) { rc = -ENODEV; goto error; @@ -117,6 +123,24 @@ error: return rc; } +static int nfc_rfkill_set_block(void *data, bool blocked) +{ + struct nfc_dev *dev = data; + + pr_debug("%s blocked %d", dev_name(&dev->dev), blocked); + + if (!blocked) + return 0; + + nfc_dev_down(dev); + + return 0; +} + +static const struct rfkill_ops nfc_rfkill_ops = { + .set_block = nfc_rfkill_set_block, +}; + /** * nfc_start_poll - start polling for nfc targets * @@ -840,6 +864,15 @@ int nfc_register_device(struct nfc_dev *dev) pr_debug("The userspace won't be notified that the device %s was added\n", dev_name(&dev->dev)); + dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev, + RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev); + if (dev->rfkill) { + if (rfkill_register(dev->rfkill) < 0) { + rfkill_destroy(dev->rfkill); + dev->rfkill = NULL; + } + } + return 0; } EXPORT_SYMBOL(nfc_register_device); @@ -857,6 +890,11 @@ void nfc_unregister_device(struct nfc_dev *dev) id = dev->idx; + if (dev->rfkill) { + rfkill_unregister(dev->rfkill); + rfkill_destroy(dev->rfkill); + } + if (dev->ops->check_presence) { device_lock(&dev->dev); dev->shutting_down = true; -- cgit v1.2.3-70-g09d2 From 1640f28f6b839637d9b82a3c4a19120601e59c66 Mon Sep 17 00:00:00 2001 From: Franky Lin Date: Thu, 11 Apr 2013 13:28:51 +0200 Subject: brcmfmac: add support for dongle ARM CR4 core Newer WiFi chip use ARM CR4 core to achieve higher performance. Add necessary code for host driver in order to support CR4 core. Reviewed-by: Arend van Spriel Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c | 15 ++- .../net/wireless/brcm80211/brcmfmac/sdio_chip.c | 150 ++++++++++++++++++--- .../net/wireless/brcm80211/brcmfmac/sdio_chip.h | 6 +- include/linux/bcma/bcma.h | 1 + include/linux/bcma/bcma_regs.h | 1 + 5 files changed, 148 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index 3147960ad97..d24eb66d324 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -2652,7 +2652,7 @@ static int brcmf_sdio_readshared(struct brcmf_sdio *bus, struct sdpcm_shared_le sh_le; __le32 addr_le; - shaddr = bus->ramsize - 4; + shaddr = bus->ci->rambase + bus->ramsize - 4; /* * Read last word in socram to determine @@ -3030,10 +3030,11 @@ static int brcmf_sdbrcm_get_image(char *buf, int len, struct brcmf_sdio *bus) static int brcmf_sdbrcm_download_code_file(struct brcmf_sdio *bus) { - int offset = 0; + int offset; uint len; u8 *memblock = NULL, *memptr; int ret; + u8 idx; brcmf_dbg(INFO, "Enter\n"); @@ -3054,9 +3055,14 @@ static int brcmf_sdbrcm_download_code_file(struct brcmf_sdio *bus) memptr += (BRCMF_SDALIGN - ((u32)(unsigned long)memblock % BRCMF_SDALIGN)); + offset = bus->ci->rambase; + /* Download image */ - while ((len = - brcmf_sdbrcm_get_image((char *)memptr, MEMBLOCK, bus))) { + len = brcmf_sdbrcm_get_image((char *)memptr, MEMBLOCK, bus); + idx = brcmf_sdio_chip_getinfidx(bus->ci, BCMA_CORE_ARM_CR4); + if (BRCMF_MAX_CORENUM != idx) + memcpy(&bus->ci->rst_vec, memptr, sizeof(bus->ci->rst_vec)); + while (len) { ret = brcmf_sdio_ramrw(bus->sdiodev, true, offset, memptr, len); if (ret) { brcmf_err("error %d on writing %d membytes at 0x%08x\n", @@ -3065,6 +3071,7 @@ static int brcmf_sdbrcm_download_code_file(struct brcmf_sdio *bus) } offset += MEMBLOCK; + len = brcmf_sdbrcm_get_image((char *)memptr, MEMBLOCK, bus); } err: diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.c index 9818598f30e..5db985cb0e0 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.c @@ -52,6 +52,9 @@ #define CIB_REV_MASK 0xff000000 #define CIB_REV_SHIFT 24 +/* ARM CR4 core specific control flag bits */ +#define ARMCR4_BCMA_IOCTL_CPUHALT 0x0020 + #define SDIOD_DRVSTR_KEY(chip, pmu) (((chip) << 16) | (pmu)) /* SDIO Pad drive strength to select value mappings */ struct sdiod_drive_str { @@ -149,7 +152,7 @@ brcmf_sdio_ai_iscoreup(struct brcmf_sdio_dev *sdiodev, static void brcmf_sdio_sb_coredisable(struct brcmf_sdio_dev *sdiodev, - struct chip_info *ci, u16 coreid) + struct chip_info *ci, u16 coreid, u32 core_bits) { u32 regdata, base; u8 idx; @@ -235,7 +238,7 @@ brcmf_sdio_sb_coredisable(struct brcmf_sdio_dev *sdiodev, static void brcmf_sdio_ai_coredisable(struct brcmf_sdio_dev *sdiodev, - struct chip_info *ci, u16 coreid) + struct chip_info *ci, u16 coreid, u32 core_bits) { u8 idx; u32 regdata; @@ -249,19 +252,36 @@ brcmf_sdio_ai_coredisable(struct brcmf_sdio_dev *sdiodev, if ((regdata & BCMA_RESET_CTL_RESET) != 0) return; - brcmf_sdio_regwl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, 0, NULL); - regdata = brcmf_sdio_regrl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, + /* ensure no pending backplane operation + * 300uc should be sufficient for backplane ops to be finish + * extra 10ms is taken into account for firmware load stage + * after 10300us carry on disabling the core anyway + */ + SPINWAIT(brcmf_sdio_regrl(sdiodev, + ci->c_inf[idx].wrapbase+BCMA_RESET_ST, + NULL), 10300); + regdata = brcmf_sdio_regrl(sdiodev, + ci->c_inf[idx].wrapbase+BCMA_RESET_ST, NULL); - udelay(10); + if (regdata) + brcmf_err("disabling core 0x%x with reset status %x\n", + coreid, regdata); brcmf_sdio_regwl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_RESET_CTL, BCMA_RESET_CTL_RESET, NULL); udelay(1); + + brcmf_sdio_regwl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, + core_bits, NULL); + regdata = brcmf_sdio_regrl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, + NULL); + usleep_range(10, 20); + } static void brcmf_sdio_sb_resetcore(struct brcmf_sdio_dev *sdiodev, - struct chip_info *ci, u16 coreid) + struct chip_info *ci, u16 coreid, u32 core_bits) { u32 regdata; u8 idx; @@ -272,7 +292,7 @@ brcmf_sdio_sb_resetcore(struct brcmf_sdio_dev *sdiodev, * Must do the disable sequence first to work for * arbitrary current core state. */ - brcmf_sdio_sb_coredisable(sdiodev, ci, coreid); + brcmf_sdio_sb_coredisable(sdiodev, ci, coreid, 0); /* * Now do the initialization sequence. @@ -325,7 +345,7 @@ brcmf_sdio_sb_resetcore(struct brcmf_sdio_dev *sdiodev, static void brcmf_sdio_ai_resetcore(struct brcmf_sdio_dev *sdiodev, - struct chip_info *ci, u16 coreid) + struct chip_info *ci, u16 coreid, u32 core_bits) { u8 idx; u32 regdata; @@ -333,28 +353,67 @@ brcmf_sdio_ai_resetcore(struct brcmf_sdio_dev *sdiodev, idx = brcmf_sdio_chip_getinfidx(ci, coreid); /* must disable first to work for arbitrary current core state */ - brcmf_sdio_ai_coredisable(sdiodev, ci, coreid); + brcmf_sdio_ai_coredisable(sdiodev, ci, coreid, core_bits); /* now do initialization sequence */ brcmf_sdio_regwl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, - BCMA_IOCTL_FGC | BCMA_IOCTL_CLK, NULL); + core_bits | BCMA_IOCTL_FGC | BCMA_IOCTL_CLK, NULL); regdata = brcmf_sdio_regrl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, NULL); brcmf_sdio_regwl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_RESET_CTL, 0, NULL); + regdata = brcmf_sdio_regrl(sdiodev, + ci->c_inf[idx].wrapbase+BCMA_RESET_CTL, + NULL); udelay(1); brcmf_sdio_regwl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, - BCMA_IOCTL_CLK, NULL); + core_bits | BCMA_IOCTL_CLK, NULL); regdata = brcmf_sdio_regrl(sdiodev, ci->c_inf[idx].wrapbase+BCMA_IOCTL, NULL); udelay(1); } +#ifdef DEBUG +/* safety check for chipinfo */ +static int brcmf_sdio_chip_cichk(struct chip_info *ci) +{ + u8 core_idx; + + /* check RAM core presence for ARM CM3 core */ + core_idx = brcmf_sdio_chip_getinfidx(ci, BCMA_CORE_ARM_CM3); + if (BRCMF_MAX_CORENUM != core_idx) { + core_idx = brcmf_sdio_chip_getinfidx(ci, + BCMA_CORE_INTERNAL_MEM); + if (BRCMF_MAX_CORENUM == core_idx) { + brcmf_err("RAM core not provided with ARM CM3 core\n"); + return -ENODEV; + } + } + + /* check RAM base for ARM CR4 core */ + core_idx = brcmf_sdio_chip_getinfidx(ci, BCMA_CORE_ARM_CR4); + if (BRCMF_MAX_CORENUM != core_idx) { + if (ci->rambase == 0) { + brcmf_err("RAM base not provided with ARM CR4 core\n"); + return -ENOMEM; + } + } + + return 0; +} +#else /* DEBUG */ +static inline int brcmf_sdio_chip_cichk(struct chip_info *ci) +{ + return 0; +} +#endif + static int brcmf_sdio_chip_recognition(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci, u32 regs) { u32 regdata; + int ret; /* Get CC core rev * Chipid is assume to be at offset 0 from regs arg @@ -439,6 +498,10 @@ static int brcmf_sdio_chip_recognition(struct brcmf_sdio_dev *sdiodev, return -ENODEV; } + ret = brcmf_sdio_chip_cichk(ci); + if (ret) + return ret; + switch (ci->socitype) { case SOCI_SB: ci->iscoreup = brcmf_sdio_sb_iscoreup; @@ -538,7 +601,7 @@ brcmf_sdio_chip_buscoresetup(struct brcmf_sdio_dev *sdiodev, * Make sure any on-chip ARM is off (in case strapping is wrong), * or downloaded code was already running. */ - ci->coredisable(sdiodev, ci, BCMA_CORE_ARM_CM3); + ci->coredisable(sdiodev, ci, BCMA_CORE_ARM_CM3, 0); } int brcmf_sdio_chip_attach(struct brcmf_sdio_dev *sdiodev, @@ -701,7 +764,7 @@ static bool brcmf_sdio_chip_writenvram(struct brcmf_sdio_dev *sdiodev, u32 token; __le32 token_le; - nvram_addr = (ci->ramsize - 4) - nvram_sz; + nvram_addr = (ci->ramsize - 4) - nvram_sz + ci->rambase; /* Write the vars list */ err = brcmf_sdio_ramrw(sdiodev, true, nvram_addr, nvram_dat, nvram_sz); @@ -728,7 +791,7 @@ static bool brcmf_sdio_chip_writenvram(struct brcmf_sdio_dev *sdiodev, nvram_addr, nvram_sz, token); /* Write the length token to the last word */ - if (brcmf_sdio_ramrw(sdiodev, true, (ci->ramsize - 4), + if (brcmf_sdio_ramrw(sdiodev, true, (ci->ramsize - 4 + ci->rambase), (u8 *)&token_le, 4)) return false; @@ -741,8 +804,8 @@ brcmf_sdio_chip_cm3_enterdl(struct brcmf_sdio_dev *sdiodev, { u32 zeros = 0; - ci->coredisable(sdiodev, ci, BCMA_CORE_ARM_CM3); - ci->resetcore(sdiodev, ci, BCMA_CORE_INTERNAL_MEM); + ci->coredisable(sdiodev, ci, BCMA_CORE_ARM_CM3, 0); + ci->resetcore(sdiodev, ci, BCMA_CORE_INTERNAL_MEM, 0); /* clear length token */ brcmf_sdio_ramrw(sdiodev, true, ci->ramsize - 4, (u8 *)&zeros, 4); @@ -769,7 +832,41 @@ brcmf_sdio_chip_cm3_exitdl(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci, reg_addr += offsetof(struct sdpcmd_regs, intstatus); brcmf_sdio_regwl(sdiodev, reg_addr, 0xFFFFFFFF, NULL); - ci->resetcore(sdiodev, ci, BCMA_CORE_ARM_CM3); + ci->resetcore(sdiodev, ci, BCMA_CORE_ARM_CM3, 0); + + return true; +} + +static inline void +brcmf_sdio_chip_cr4_enterdl(struct brcmf_sdio_dev *sdiodev, + struct chip_info *ci) +{ + ci->resetcore(sdiodev, ci, BCMA_CORE_ARM_CR4, + ARMCR4_BCMA_IOCTL_CPUHALT); +} + +static bool +brcmf_sdio_chip_cr4_exitdl(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci, + char *nvram_dat, uint nvram_sz) +{ + u8 core_idx; + u32 reg_addr; + + if (!brcmf_sdio_chip_writenvram(sdiodev, ci, nvram_dat, nvram_sz)) + return false; + + /* clear all interrupts */ + core_idx = brcmf_sdio_chip_getinfidx(ci, BCMA_CORE_SDIO_DEV); + reg_addr = ci->c_inf[core_idx].base; + reg_addr += offsetof(struct sdpcmd_regs, intstatus); + brcmf_sdio_regwl(sdiodev, reg_addr, 0xFFFFFFFF, NULL); + + /* Write reset vector to address 0 */ + brcmf_sdio_ramrw(sdiodev, true, 0, (void *)&ci->rst_vec, + sizeof(ci->rst_vec)); + + /* restore ARM */ + ci->resetcore(sdiodev, ci, BCMA_CORE_ARM_CR4, 0); return true; } @@ -777,12 +874,27 @@ brcmf_sdio_chip_cm3_exitdl(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci, void brcmf_sdio_chip_enter_download(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci) { - brcmf_sdio_chip_cm3_enterdl(sdiodev, ci); + u8 arm_core_idx; + + arm_core_idx = brcmf_sdio_chip_getinfidx(ci, BCMA_CORE_ARM_CM3); + if (BRCMF_MAX_CORENUM != arm_core_idx) { + brcmf_sdio_chip_cm3_enterdl(sdiodev, ci); + return; + } + + brcmf_sdio_chip_cr4_enterdl(sdiodev, ci); } bool brcmf_sdio_chip_exit_download(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci, char *nvram_dat, uint nvram_sz) { - return brcmf_sdio_chip_cm3_exitdl(sdiodev, ci, nvram_dat, nvram_sz); + u8 arm_core_idx; + + arm_core_idx = brcmf_sdio_chip_getinfidx(ci, BCMA_CORE_ARM_CM3); + if (BRCMF_MAX_CORENUM != arm_core_idx) + return brcmf_sdio_chip_cm3_exitdl(sdiodev, ci, nvram_dat, + nvram_sz); + + return brcmf_sdio_chip_cr4_exitdl(sdiodev, ci, nvram_dat, nvram_sz); } diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.h b/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.h index 2123ea71d12..83c041f1bf4 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.h +++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio_chip.h @@ -73,15 +73,17 @@ struct chip_info { u32 pmurev; u32 pmucaps; u32 ramsize; + u32 rambase; + u32 rst_vec; /* reset vertor for ARM CR4 core */ bool (*iscoreup)(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci, u16 coreid); u32 (*corerev)(struct brcmf_sdio_dev *sdiodev, struct chip_info *ci, u16 coreid); void (*coredisable)(struct brcmf_sdio_dev *sdiodev, - struct chip_info *ci, u16 coreid); + struct chip_info *ci, u16 coreid, u32 core_bits); void (*resetcore)(struct brcmf_sdio_dev *sdiodev, - struct chip_info *ci, u16 coreid); + struct chip_info *ci, u16 coreid, u32 core_bits); }; struct sbconfig { diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 0ab6712fd76..f14a98a79c9 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -134,6 +134,7 @@ struct bcma_host_ops { #define BCMA_CORE_I2S 0x834 #define BCMA_CORE_SDR_DDR1_MEM_CTL 0x835 /* SDR/DDR1 memory controller core */ #define BCMA_CORE_SHIM 0x837 /* SHIM component in ubus/6362 */ +#define BCMA_CORE_ARM_CR4 0x83e #define BCMA_CORE_DEFAULT 0xFFF #define BCMA_MAX_NR_CORES 16 diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index 7e8104bb7a7..917dcd7965e 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -37,6 +37,7 @@ #define BCMA_IOST_BIST_DONE 0x8000 #define BCMA_RESET_CTL 0x0800 #define BCMA_RESET_CTL_RESET 0x0001 +#define BCMA_RESET_ST 0x0804 /* BCMA PCI config space registers. */ #define BCMA_PCI_PMCSR 0x44 -- cgit v1.2.3-70-g09d2 From 1e9ab4dd258ecbb0f1c377fd4dbe227cdb93d9bd Mon Sep 17 00:00:00 2001 From: Piotr Haber Date: Thu, 11 Apr 2013 13:28:52 +0200 Subject: brcmfmac: setup SDIO reset behavior Set device in a manner that SDIO I/O card reset will lead to WLAN backplane and PMU state reset. Reviewed-by: Hante Meuleman Reviewed-by: Arend van Spriel Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky (Zhenhui) Lin Signed-off-by: Piotr Haber Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c | 38 ++++++++++++++++++---- .../net/wireless/brcm80211/brcmfmac/sdio_host.h | 2 ++ include/linux/bcma/bcma_driver_chipcommon.h | 3 ++ 3 files changed, 36 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index d24eb66d324..c06bb083c45 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -3635,7 +3635,6 @@ brcmf_sdbrcm_probe_attach(struct brcmf_sdio *bus, u32 regsva) int err = 0; int reg_addr; u32 reg_val; - u8 idx; bus->alp_only = true; @@ -3686,12 +3685,37 @@ brcmf_sdbrcm_probe_attach(struct brcmf_sdio *bus, u32 regsva) goto fail; } - /* Set core control so an SDIO reset does a backplane reset */ - idx = brcmf_sdio_chip_getinfidx(bus->ci, BCMA_CORE_SDIO_DEV); - reg_addr = bus->ci->c_inf[idx].base + - offsetof(struct sdpcmd_regs, corecontrol); - reg_val = brcmf_sdio_regrl(bus->sdiodev, reg_addr, NULL); - brcmf_sdio_regwl(bus->sdiodev, reg_addr, reg_val | CC_BPRESEN, NULL); + /* Set card control so an SDIO card reset does a WLAN backplane reset */ + reg_val = brcmf_sdio_regrb(bus->sdiodev, + SDIO_CCCR_BRCM_CARDCTRL, &err); + if (err) + goto fail; + + reg_val |= SDIO_CCCR_BRCM_CARDCTRL_WLANRESET; + + brcmf_sdio_regwb(bus->sdiodev, + SDIO_CCCR_BRCM_CARDCTRL, reg_val, &err); + if (err) + goto fail; + + /* set PMUControl so a backplane reset does PMU state reload */ + reg_addr = CORE_CC_REG(bus->ci->c_inf[0].base, + pmucontrol); + reg_val = brcmf_sdio_regrl(bus->sdiodev, + reg_addr, + &err); + if (err) + goto fail; + + reg_val |= (BCMA_CC_PMU_CTL_RES_RELOAD << BCMA_CC_PMU_CTL_RES_SHIFT); + + brcmf_sdio_regwl(bus->sdiodev, + reg_addr, + reg_val, + &err); + if (err) + goto fail; + sdio_release_host(bus->sdiodev->func[1]); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h b/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h index b9b397b5996..28ed3cc9f35 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h +++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h @@ -52,6 +52,8 @@ #define SDIO_CCCR_BRCM_CARDCAP_CMD14_SUPPORT 0x02 #define SDIO_CCCR_BRCM_CARDCAP_CMD14_EXT 0x04 #define SDIO_CCCR_BRCM_CARDCAP_CMD_NODEC 0x08 +#define SDIO_CCCR_BRCM_CARDCTRL 0xf1 +#define SDIO_CCCR_BRCM_CARDCTRL_WLANRESET 0x02 #define SDIO_CCCR_BRCM_SEPINT 0xf2 #define SDIO_SEPINT_MASK 0x01 diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 453fcc91468..b8b09eac60a 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -316,6 +316,9 @@ #define BCMA_CC_PMU_CTL 0x0600 /* PMU control */ #define BCMA_CC_PMU_CTL_ILP_DIV 0xFFFF0000 /* ILP div mask */ #define BCMA_CC_PMU_CTL_ILP_DIV_SHIFT 16 +#define BCMA_CC_PMU_CTL_RES 0x00006000 /* reset control mask */ +#define BCMA_CC_PMU_CTL_RES_SHIFT 13 +#define BCMA_CC_PMU_CTL_RES_RELOAD 0x2 /* reload POR values */ #define BCMA_CC_PMU_CTL_PLL_UPD 0x00000400 #define BCMA_CC_PMU_CTL_NOILPONW 0x00000200 /* No ILP on wait */ #define BCMA_CC_PMU_CTL_HTREQEN 0x00000100 /* HT req enable */ -- cgit v1.2.3-70-g09d2 From 668761ac01d6f5a36b8e5a24d4e154550e2c4c3b Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Fri, 12 Apr 2013 10:55:55 +0200 Subject: brcmfmac: define and use platform specific data for SDIO. This patch adds support for platform specific data for SDIO fullmac devices. Currently OOB interrupts are configured by Kconfig BRCMFMAC_SDIO_OOB but that is now determined dynamically by checking availibility of platform data. Cc: Hauke Mehrtens Reviewed-by: Arend Van Spriel Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Piotr Haber Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/Kconfig | 9 -- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 155 +++++++++++---------- .../net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c | 114 +++++---------- drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c | 29 ++-- .../net/wireless/brcm80211/brcmfmac/sdio_host.h | 6 +- include/linux/platform_data/brcmfmac-sdio.h | 124 +++++++++++++++++ 6 files changed, 250 insertions(+), 187 deletions(-) create mode 100644 include/linux/platform_data/brcmfmac-sdio.h (limited to 'include') diff --git a/drivers/net/wireless/brcm80211/Kconfig b/drivers/net/wireless/brcm80211/Kconfig index 747e9317dab..fc8a0fa6d3b 100644 --- a/drivers/net/wireless/brcm80211/Kconfig +++ b/drivers/net/wireless/brcm80211/Kconfig @@ -37,15 +37,6 @@ config BRCMFMAC_SDIO IEEE802.11n embedded FullMAC WLAN driver. Say Y if you want to use the driver for a SDIO wireless card. -config BRCMFMAC_SDIO_OOB - bool "Out of band interrupt support for SDIO interface chipset" - depends on BRCMFMAC_SDIO - ---help--- - This option enables out-of-band interrupt support for Broadcom - SDIO Wifi chipset using fullmac in order to gain better - performance and deep sleep wake up capability on certain - platforms. Say N if you are unsure. - config BRCMFMAC_USB bool "USB bus interface support for FullMAC driver" depends on USB diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index aa51f370be4..4891e3df205 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -37,16 +38,15 @@ #define SDIOH_API_ACCESS_RETRY_LIMIT 2 -#ifdef CONFIG_BRCMFMAC_SDIO_OOB -static irqreturn_t brcmf_sdio_irqhandler(int irq, void *dev_id) + +static irqreturn_t brcmf_sdio_oob_irqhandler(int irq, void *dev_id) { struct brcmf_bus *bus_if = dev_get_drvdata(dev_id); struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio; - brcmf_dbg(INTR, "oob intr triggered\n"); + brcmf_dbg(INTR, "OOB intr triggered\n"); - /* - * out-of-band interrupt is level-triggered which won't + /* out-of-band interrupt is level-triggered which won't * be cleared until dpc */ if (sdiodev->irq_en) { @@ -59,72 +59,12 @@ static irqreturn_t brcmf_sdio_irqhandler(int irq, void *dev_id) return IRQ_HANDLED; } -int brcmf_sdio_intr_register(struct brcmf_sdio_dev *sdiodev) -{ - int ret = 0; - u8 data; - unsigned long flags; - - brcmf_dbg(SDIO, "Entering: irq %d\n", sdiodev->irq); - - ret = request_irq(sdiodev->irq, brcmf_sdio_irqhandler, - sdiodev->irq_flags, "brcmf_oob_intr", - &sdiodev->func[1]->dev); - if (ret != 0) - return ret; - spin_lock_init(&sdiodev->irq_en_lock); - spin_lock_irqsave(&sdiodev->irq_en_lock, flags); - sdiodev->irq_en = true; - spin_unlock_irqrestore(&sdiodev->irq_en_lock, flags); - - ret = enable_irq_wake(sdiodev->irq); - if (ret != 0) - return ret; - sdiodev->irq_wake = true; - - sdio_claim_host(sdiodev->func[1]); - - /* must configure SDIO_CCCR_IENx to enable irq */ - data = brcmf_sdio_regrb(sdiodev, SDIO_CCCR_IENx, &ret); - data |= 1 << SDIO_FUNC_1 | 1 << SDIO_FUNC_2 | 1; - brcmf_sdio_regwb(sdiodev, SDIO_CCCR_IENx, data, &ret); - - /* redirect, configure and enable io for interrupt signal */ - data = SDIO_SEPINT_MASK | SDIO_SEPINT_OE; - if (sdiodev->irq_flags & IRQF_TRIGGER_HIGH) - data |= SDIO_SEPINT_ACT_HI; - brcmf_sdio_regwb(sdiodev, SDIO_CCCR_BRCM_SEPINT, data, &ret); - - sdio_release_host(sdiodev->func[1]); - - return 0; -} - -int brcmf_sdio_intr_unregister(struct brcmf_sdio_dev *sdiodev) -{ - brcmf_dbg(SDIO, "Entering\n"); - - sdio_claim_host(sdiodev->func[1]); - brcmf_sdio_regwb(sdiodev, SDIO_CCCR_BRCM_SEPINT, 0, NULL); - brcmf_sdio_regwb(sdiodev, SDIO_CCCR_IENx, 0, NULL); - sdio_release_host(sdiodev->func[1]); - - if (sdiodev->irq_wake) { - disable_irq_wake(sdiodev->irq); - sdiodev->irq_wake = false; - } - free_irq(sdiodev->irq, &sdiodev->func[1]->dev); - sdiodev->irq_en = false; - - return 0; -} -#else /* CONFIG_BRCMFMAC_SDIO_OOB */ -static void brcmf_sdio_irqhandler(struct sdio_func *func) +static void brcmf_sdio_ib_irqhandler(struct sdio_func *func) { struct brcmf_bus *bus_if = dev_get_drvdata(&func->dev); struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio; - brcmf_dbg(INTR, "ib intr triggered\n"); + brcmf_dbg(INTR, "IB intr triggered\n"); brcmf_sdbrcm_isr(sdiodev->bus); } @@ -136,12 +76,56 @@ static void brcmf_sdio_dummy_irqhandler(struct sdio_func *func) int brcmf_sdio_intr_register(struct brcmf_sdio_dev *sdiodev) { - brcmf_dbg(SDIO, "Entering\n"); + int ret = 0; + u8 data; + unsigned long flags; - sdio_claim_host(sdiodev->func[1]); - sdio_claim_irq(sdiodev->func[1], brcmf_sdio_irqhandler); - sdio_claim_irq(sdiodev->func[2], brcmf_sdio_dummy_irqhandler); - sdio_release_host(sdiodev->func[1]); + if ((sdiodev->pdata) && (sdiodev->pdata->oob_irq_supported)) { + brcmf_dbg(SDIO, "Enter, register OOB IRQ %d\n", + sdiodev->pdata->oob_irq_nr); + ret = request_irq(sdiodev->pdata->oob_irq_nr, + brcmf_sdio_oob_irqhandler, + sdiodev->pdata->oob_irq_flags, + "brcmf_oob_intr", + &sdiodev->func[1]->dev); + if (ret != 0) { + brcmf_err("request_irq failed %d\n", ret); + return ret; + } + sdiodev->oob_irq_requested = true; + spin_lock_init(&sdiodev->irq_en_lock); + spin_lock_irqsave(&sdiodev->irq_en_lock, flags); + sdiodev->irq_en = true; + spin_unlock_irqrestore(&sdiodev->irq_en_lock, flags); + + ret = enable_irq_wake(sdiodev->pdata->oob_irq_nr); + if (ret != 0) { + brcmf_err("enable_irq_wake failed %d\n", ret); + return ret; + } + sdiodev->irq_wake = true; + + sdio_claim_host(sdiodev->func[1]); + + /* must configure SDIO_CCCR_IENx to enable irq */ + data = brcmf_sdio_regrb(sdiodev, SDIO_CCCR_IENx, &ret); + data |= 1 << SDIO_FUNC_1 | 1 << SDIO_FUNC_2 | 1; + brcmf_sdio_regwb(sdiodev, SDIO_CCCR_IENx, data, &ret); + + /* redirect, configure and enable io for interrupt signal */ + data = SDIO_SEPINT_MASK | SDIO_SEPINT_OE; + if (sdiodev->pdata->oob_irq_flags & IRQF_TRIGGER_HIGH) + data |= SDIO_SEPINT_ACT_HI; + brcmf_sdio_regwb(sdiodev, SDIO_CCCR_BRCM_SEPINT, data, &ret); + + sdio_release_host(sdiodev->func[1]); + } else { + brcmf_dbg(SDIO, "Entering\n"); + sdio_claim_host(sdiodev->func[1]); + sdio_claim_irq(sdiodev->func[1], brcmf_sdio_ib_irqhandler); + sdio_claim_irq(sdiodev->func[2], brcmf_sdio_dummy_irqhandler); + sdio_release_host(sdiodev->func[1]); + } return 0; } @@ -150,14 +134,31 @@ int brcmf_sdio_intr_unregister(struct brcmf_sdio_dev *sdiodev) { brcmf_dbg(SDIO, "Entering\n"); - sdio_claim_host(sdiodev->func[1]); - sdio_release_irq(sdiodev->func[2]); - sdio_release_irq(sdiodev->func[1]); - sdio_release_host(sdiodev->func[1]); + if ((sdiodev->pdata) && (sdiodev->pdata->oob_irq_supported)) { + sdio_claim_host(sdiodev->func[1]); + brcmf_sdio_regwb(sdiodev, SDIO_CCCR_BRCM_SEPINT, 0, NULL); + brcmf_sdio_regwb(sdiodev, SDIO_CCCR_IENx, 0, NULL); + sdio_release_host(sdiodev->func[1]); + + if (sdiodev->oob_irq_requested) { + sdiodev->oob_irq_requested = false; + if (sdiodev->irq_wake) { + disable_irq_wake(sdiodev->pdata->oob_irq_nr); + sdiodev->irq_wake = false; + } + free_irq(sdiodev->pdata->oob_irq_nr, + &sdiodev->func[1]->dev); + sdiodev->irq_en = false; + } + } else { + sdio_claim_host(sdiodev->func[1]); + sdio_release_irq(sdiodev->func[2]); + sdio_release_irq(sdiodev->func[1]); + sdio_release_host(sdiodev->func[1]); + } return 0; } -#endif /* CONFIG_BRCMFMAC_SDIO_OOB */ int brcmf_sdcard_set_sbaddr_window(struct brcmf_sdio_dev *sdiodev, u32 address) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c index b1ea1036e82..44fa0cdbf97 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c @@ -26,6 +26,7 @@ #include /* request_irq() */ #include #include +#include #include #include @@ -62,14 +63,8 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { }; MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids); -#ifdef CONFIG_BRCMFMAC_SDIO_OOB -static struct list_head oobirq_lh; -struct brcmf_sdio_oobirq { - unsigned int irq; - unsigned long flags; - struct list_head list; -}; -#endif /* CONFIG_BRCMFMAC_SDIO_OOB */ +static struct brcmfmac_sdio_platform_data *brcmfmac_sdio_pdata; + static bool brcmf_pm_resume_error(struct brcmf_sdio_dev *sdiodev) @@ -428,33 +423,6 @@ void brcmf_sdioh_detach(struct brcmf_sdio_dev *sdiodev) } -#ifdef CONFIG_BRCMFMAC_SDIO_OOB -static int brcmf_sdio_getintrcfg(struct brcmf_sdio_dev *sdiodev) -{ - struct brcmf_sdio_oobirq *oobirq_entry; - - if (list_empty(&oobirq_lh)) { - brcmf_err("no valid oob irq resource\n"); - return -ENXIO; - } - - oobirq_entry = list_first_entry(&oobirq_lh, struct brcmf_sdio_oobirq, - list); - - sdiodev->irq = oobirq_entry->irq; - sdiodev->irq_flags = oobirq_entry->flags; - list_del(&oobirq_entry->list); - kfree(oobirq_entry); - - return 0; -} -#else -static inline int brcmf_sdio_getintrcfg(struct brcmf_sdio_dev *sdiodev) -{ - return 0; -} -#endif /* CONFIG_BRCMFMAC_SDIO_OOB */ - static int brcmf_ops_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id) { @@ -495,15 +463,13 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func, dev_set_drvdata(&func->dev, bus_if); dev_set_drvdata(&sdiodev->func[1]->dev, bus_if); sdiodev->dev = &sdiodev->func[1]->dev; + sdiodev->pdata = brcmfmac_sdio_pdata; atomic_set(&sdiodev->suspend, false); init_waitqueue_head(&sdiodev->request_byte_wait); init_waitqueue_head(&sdiodev->request_word_wait); init_waitqueue_head(&sdiodev->request_chain_wait); init_waitqueue_head(&sdiodev->request_buffer_wait); - err = brcmf_sdio_getintrcfg(sdiodev); - if (err) - goto fail; brcmf_dbg(SDIO, "F2 found, calling brcmf_sdio_probe...\n"); err = brcmf_sdio_probe(sdiodev); @@ -598,7 +564,7 @@ static const struct dev_pm_ops brcmf_sdio_pm_ops = { static struct sdio_driver brcmf_sdmmc_driver = { .probe = brcmf_ops_sdio_probe, .remove = brcmf_ops_sdio_remove, - .name = "brcmfmac", + .name = BRCMFMAC_SDIO_PDATA_NAME, .id_table = brcmf_sdmmc_ids, #ifdef CONFIG_PM_SLEEP .drv = { @@ -607,72 +573,51 @@ static struct sdio_driver brcmf_sdmmc_driver = { #endif /* CONFIG_PM_SLEEP */ }; -#ifdef CONFIG_BRCMFMAC_SDIO_OOB static int brcmf_sdio_pd_probe(struct platform_device *pdev) { - struct resource *res; - struct brcmf_sdio_oobirq *oobirq_entry; - int i, ret; + int ret; - INIT_LIST_HEAD(&oobirq_lh); + brcmf_dbg(SDIO, "Enter\n"); - for (i = 0; ; i++) { - res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!res) - break; + brcmfmac_sdio_pdata = pdev->dev.platform_data; - oobirq_entry = kzalloc(sizeof(struct brcmf_sdio_oobirq), - GFP_KERNEL); - if (!oobirq_entry) - return -ENOMEM; - oobirq_entry->irq = res->start; - oobirq_entry->flags = res->flags & IRQF_TRIGGER_MASK; - list_add_tail(&oobirq_entry->list, &oobirq_lh); - } - if (i == 0) - return -ENXIO; + if (brcmfmac_sdio_pdata->power_on) + brcmfmac_sdio_pdata->power_on(); ret = sdio_register_driver(&brcmf_sdmmc_driver); - if (ret) brcmf_err("sdio_register_driver failed: %d\n", ret); return ret; } -static struct platform_driver brcmf_sdio_pd = { - .probe = brcmf_sdio_pd_probe, - .driver = { - .name = "brcmf_sdio_pd" - } -}; - -void brcmf_sdio_exit(void) +static int brcmf_sdio_pd_remove(struct platform_device *pdev) { brcmf_dbg(SDIO, "Enter\n"); + if (brcmfmac_sdio_pdata->power_off) + brcmfmac_sdio_pdata->power_off(); + sdio_unregister_driver(&brcmf_sdmmc_driver); - platform_driver_unregister(&brcmf_sdio_pd); + return 0; } -void brcmf_sdio_init(void) -{ - int ret; - - brcmf_dbg(SDIO, "Enter\n"); - - ret = platform_driver_register(&brcmf_sdio_pd); +static struct platform_driver brcmf_sdio_pd = { + .remove = brcmf_sdio_pd_remove, + .driver = { + .name = BRCMFMAC_SDIO_PDATA_NAME + } +}; - if (ret) - brcmf_err("platform_driver_register failed: %d\n", ret); -} -#else void brcmf_sdio_exit(void) { brcmf_dbg(SDIO, "Enter\n"); - sdio_unregister_driver(&brcmf_sdmmc_driver); + if (brcmfmac_sdio_pdata) + platform_driver_unregister(&brcmf_sdio_pd); + else + sdio_unregister_driver(&brcmf_sdmmc_driver); } void brcmf_sdio_init(void) @@ -681,9 +626,12 @@ void brcmf_sdio_init(void) brcmf_dbg(SDIO, "Enter\n"); - ret = sdio_register_driver(&brcmf_sdmmc_driver); + ret = platform_driver_probe(&brcmf_sdio_pd, brcmf_sdio_pd_probe); + if (ret == -ENODEV) { + brcmf_dbg(SDIO, "No platform data available, registering without.\n"); + ret = sdio_register_driver(&brcmf_sdmmc_driver); + } if (ret) - brcmf_err("sdio_register_driver failed: %d\n", ret); + brcmf_err("driver registration failed: %d\n", ret); } -#endif /* CONFIG_BRCMFMAC_SDIO_OOB */ diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index fd697ce3683..d2487518bd2 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -517,7 +518,7 @@ static int qcount[NUMPRIO]; static int tx_packets[NUMPRIO]; #endif /* DEBUG */ -#define SDIO_DRIVE_STRENGTH 6 /* in milliamps */ +#define DEFAULT_SDIO_DRIVE_STRENGTH 6 /* in milliamps */ #define RETRYCHAN(chan) ((chan) == SDPCM_EVENT_CHANNEL) @@ -2046,23 +2047,19 @@ static void brcmf_sdbrcm_bus_stop(struct device *dev) bus->tx_seq = bus->rx_seq = 0; } -#ifdef CONFIG_BRCMFMAC_SDIO_OOB static inline void brcmf_sdbrcm_clrintr(struct brcmf_sdio *bus) { unsigned long flags; - spin_lock_irqsave(&bus->sdiodev->irq_en_lock, flags); - if (!bus->sdiodev->irq_en && !atomic_read(&bus->ipend)) { - enable_irq(bus->sdiodev->irq); - bus->sdiodev->irq_en = true; + if (bus->sdiodev->oob_irq_requested) { + spin_lock_irqsave(&bus->sdiodev->irq_en_lock, flags); + if (!bus->sdiodev->irq_en && !atomic_read(&bus->ipend)) { + enable_irq(bus->sdiodev->pdata->oob_irq_nr); + bus->sdiodev->irq_en = true; + } + spin_unlock_irqrestore(&bus->sdiodev->irq_en_lock, flags); } - spin_unlock_irqrestore(&bus->sdiodev->irq_en_lock, flags); -} -#else -static inline void brcmf_sdbrcm_clrintr(struct brcmf_sdio *bus) -{ } -#endif /* CONFIG_BRCMFMAC_SDIO_OOB */ static inline void brcmf_sdbrcm_adddpctsk(struct brcmf_sdio *bus) { @@ -3639,6 +3636,7 @@ brcmf_sdbrcm_probe_attach(struct brcmf_sdio *bus, u32 regsva) int err = 0; int reg_addr; u32 reg_val; + u32 drivestrength; bus->alp_only = true; @@ -3679,8 +3677,11 @@ brcmf_sdbrcm_probe_attach(struct brcmf_sdio *bus, u32 regsva) goto fail; } - brcmf_sdio_chip_drivestrengthinit(bus->sdiodev, bus->ci, - SDIO_DRIVE_STRENGTH); + if ((bus->sdiodev->pdata) && (bus->sdiodev->pdata->drive_strength)) + drivestrength = bus->sdiodev->pdata->drive_strength; + else + drivestrength = DEFAULT_SDIO_DRIVE_STRENGTH; + brcmf_sdio_chip_drivestrengthinit(bus->sdiodev, bus->ci, drivestrength); /* Get info on the SOCRAM cores... */ bus->ramsize = bus->ci->ramsize; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h b/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h index 28ed3cc9f35..7c1b6332747 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h +++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio_host.h @@ -174,13 +174,11 @@ struct brcmf_sdio_dev { wait_queue_head_t request_buffer_wait; struct device *dev; struct brcmf_bus *bus_if; -#ifdef CONFIG_BRCMFMAC_SDIO_OOB - unsigned int irq; /* oob interrupt number */ - unsigned long irq_flags; /* board specific oob flags */ + struct brcmfmac_sdio_platform_data *pdata; + bool oob_irq_requested; bool irq_en; /* irq enable flags */ spinlock_t irq_en_lock; bool irq_wake; /* irq wake enable flags */ -#endif /* CONFIG_BRCMFMAC_SDIO_OOB */ }; /* Register/deregister interrupt handler. */ diff --git a/include/linux/platform_data/brcmfmac-sdio.h b/include/linux/platform_data/brcmfmac-sdio.h new file mode 100644 index 00000000000..1ade657d5fc --- /dev/null +++ b/include/linux/platform_data/brcmfmac-sdio.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2013 Broadcom Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _LINUX_BRCMFMAC_PLATFORM_H +#define _LINUX_BRCMFMAC_PLATFORM_H + +/* + * Platform specific driver functions and data. Through the platform specific + * device data functions can be provided to help the brcmfmac driver to + * operate with the device in combination with the used platform. + * + * Use the platform data in the following (similar) way: + * + * +#include + + +static void brcmfmac_power_on(void) +{ +} + +static void brcmfmac_power_off(void) +{ +} + +static void brcmfmac_reset(void) +{ +} + +static struct brcmfmac_sdio_platform_data brcmfmac_sdio_pdata = { + .power_on = brcmfmac_power_on, + .power_off = brcmfmac_power_off, + .reset = brcmfmac_reset +}; + +static struct platform_device brcmfmac_device = { + .name = BRCMFMAC_SDIO_PDATA_NAME, + .id = PLATFORM_DEVID_NONE, + .dev.platform_data = &brcmfmac_sdio_pdata +}; + +void __init brcmfmac_init_pdata(void) +{ + brcmfmac_sdio_pdata.oob_irq_supported = true; + brcmfmac_sdio_pdata.oob_irq_nr = gpio_to_irq(GPIO_BRCMF_SDIO_OOB); + brcmfmac_sdio_pdata.oob_irq_flags = IORESOURCE_IRQ | + IORESOURCE_IRQ_HIGHLEVEL; + platform_device_register(&brcmfmac_device); +} + * + * + * Note: the brcmfmac can be loaded as module or be statically built-in into + * the kernel. If built-in then do note that it uses module_init (and + * module_exit) routines which equal device_initcall. So if you intend to + * create a module with the platform specific data for the brcmfmac and have + * it built-in to the kernel then use a higher initcall then device_initcall + * (see init.h). If this is not done then brcmfmac will load without problems + * but will not pickup the platform data. + * + * When the driver does not "detect" platform driver data then it will continue + * without reporting anything and just assume there is no data needed. Which is + * probably true for most platforms. + * + * Explanation of the platform_data fields: + * + * drive_strength: is the preferred drive_strength to be used for the SDIO + * pins. If 0 then a default value will be used. This is the target drive + * strength, the exact drive strength which will be used depends on the + * capabilities of the device. + * + * oob_irq_supported: does the board have support for OOB interrupts. SDIO + * in-band interrupts are relatively slow and for having less overhead on + * interrupt processing an out of band interrupt can be used. If the HW + * supports this then enable this by setting this field to true and configure + * the oob related fields. + * + * oob_irq_nr, oob_irq_flags: the OOB interrupt information. The values are + * used for registering the irq using request_irq function. + * + * power_on: This function is called by the brcmfmac when the module gets + * loaded. This can be particularly useful for low power devices. The platform + * spcific routine may for example decide to power up the complete device. + * If there is no use-case for this function then provide NULL. + * + * power_off: This function is called by the brcmfmac when the module gets + * unloaded. At this point the device can be powered down or otherwise be reset. + * So if an actual power_off is not supported but reset is then reset the device + * when this function gets called. This can be particularly useful for low power + * devices. If there is no use-case for this function (either power-down or + * reset) then provide NULL. + * + * reset: This function can get called if the device communication broke down. + * This functionality is particularly useful in case of SDIO type devices. It is + * possible to reset a dongle via sdio data interface, but it requires that + * this is fully functional. This function is chip/module specific and this + * function should return only after the complete reset has completed. + */ + +#define BRCMFMAC_SDIO_PDATA_NAME "brcmfmac_sdio" + +struct brcmfmac_sdio_platform_data { + unsigned int drive_strength; + bool oob_irq_supported; + unsigned int oob_irq_nr; + unsigned long oob_irq_flags; + void (*power_on)(void); + void (*power_off)(void); + void (*reset)(void); +}; + +#endif /* _LINUX_BRCMFMAC_PLATFORM_H */ -- cgit v1.2.3-70-g09d2 From d6a4a10411764cf1c3a5dad4f06c5ebe5194488b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Apr 2013 11:31:52 +0000 Subject: tcp: GSO should be TSQ friendly I noticed that TSQ (TCP Small queues) was less effective when TSO is turned off, and GSO is on. If BQL is not enabled, TSQ has then no effect. It turns out the GSO engine frees the original gso_skb at the time the fragments are generated and queued to the NIC. We should instead call the tcp_wfree() destructor for the last fragment, to keep the flow control as intended in TSQ. This effectively limits the number of queued packets on qdisc + NIC layers. Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Yuchung Cheng Cc: Nandita Dukkipati Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 12 ++++++++++++ net/ipv4/tcp_output.c | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4475aaf0af5..5bba80fbd1d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,6 +370,7 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern void tcp_release_cb(struct sock *sk); +extern void tcp_wfree(struct sk_buff *skb); extern void tcp_write_timer_handler(struct sock *sk); extern void tcp_delack_timer_handler(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a96f7b58627..963bda18486 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2885,6 +2885,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, __be32 delta; unsigned int oldlen; unsigned int mss; + struct sk_buff *gso_skb = skb; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -2953,6 +2954,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, th->cwr = 0; } while (skb->next); + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (gso_skb->destructor == tcp_wfree) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + delta = htonl(oldlen + (skb->tail - skb->transport_header) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index af354c98fdb..d1269435354 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -787,7 +787,7 @@ void __init tcp_tasklet_init(void) * We cant xmit new skbs from this context, as we might already * hold qdisc lock. */ -static void tcp_wfree(struct sk_buff *skb) +void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.3-70-g09d2 From bf84a01063eaab2f1a37d72d1b903445b3a25a4e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 14 Apr 2013 08:08:13 +0000 Subject: net: sock: make sock_tx_timestamp void Currently, sock_tx_timestamp() always returns 0. The comment that describes the sock_tx_timestamp() function wrongly says that it returns an error when an invalid argument is passed (from commit 20d4947353be, ``net: socket infrastructure for SO_TIMESTAMPING''). Make the function void, so that we can also remove all the unneeded if conditions that check for such a _non-existant_ error case in the output path. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sock.h | 5 ++--- net/can/raw.c | 5 ++--- net/ipv4/ping.c | 5 ++--- net/ipv4/udp.c | 6 +++--- net/ipv6/ip6_output.c | 7 ++----- net/packet/af_packet.c | 10 ++++------ net/socket.c | 3 +-- 7 files changed, 16 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 08f05f96473..5c97b0fc562 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2159,10 +2159,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, * @sk: socket sending this packet * @tx_flags: filled with instructions for time stamping * - * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if - * parameters are invalid. + * Currently only depends on SOCK_TIMESTAMPING* flags. */ -extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); +extern void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/net/can/raw.c b/net/can/raw.c index c1764e41dda..1085e65f848 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto free_skb; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2e91006d607..7d93d62cd5f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; ipc.tx_flags = 0; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7117d1467b0..2722db024a0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); + if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 155eccfa776..d2eedf19233 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } /* For UDP, check if TX timestamp is enabled */ - if (sk->sk_type == SOCK_DGRAM) { - err = sock_tx_timestamp(sk, &tx_flags); - if (err) - goto error; - } + if (sk->sk_type == SOCK_DGRAM) + sock_tx_timestamp(sk, &tx_flags); /* * Let's try using as much space as possible. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8e4644ff8d3..77d71f84758 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1505,9 +1505,8 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_unlock; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2312,9 +2311,8 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_free; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, diff --git a/net/socket.c b/net/socket.c index 88f759adf3a..36883fea44f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,7 +600,7 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) @@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_SW_TSTAMP; if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; - return 0; } EXPORT_SYMBOL(sock_tx_timestamp); -- cgit v1.2.3-70-g09d2 From ff2266cddd69f5e0c9d5121ed9218d2f694406cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 15 Apr 2013 03:27:17 +0000 Subject: net: sctp: remove sctp_ep_common struct member 'malloced' There is actually no need to keep this member in the structure, because after init it's always 1 anyway, thus always kfree called. This seems to be an ancient leftover from the very initial implementation from 2.5 times. Only in case the initialization of an association fails, we leave base.malloced as 0, but we nevertheless kfree it in the error path in sctp_association_new(). Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- net/sctp/associola.c | 8 ++------ net/sctp/endpointola.c | 10 +++------- 3 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0e0f9d2322e..3e80eedab17 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1174,11 +1174,9 @@ struct sctp_ep_common { /* Some fields to help us manage this object. * refcnt - Reference count access to this object. * dead - Do not attempt to use this object. - * malloced - Do we need to kfree this object? */ atomic_t refcnt; char dead; - char malloced; /* What socket does this endpoint belong to? */ struct sock *sk; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d2709e2b7be..b893aa6862f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -105,7 +105,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); asoc->base.dead = 0; - asoc->base.malloced = 0; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -371,7 +370,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, if (!sctp_association_init(asoc, ep, sk, scope, gfp)) goto fail_init; - asoc->base.malloced = 1; SCTP_DBG_OBJCNT_INC(assoc); SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); @@ -484,10 +482,8 @@ static void sctp_association_destroy(struct sctp_association *asoc) WARN_ON(atomic_read(&asoc->rmem_alloc)); - if (asoc->base.malloced) { - kfree(asoc); - SCTP_DBG_OBJCNT_DEC(assoc); - } + kfree(asoc); + SCTP_DBG_OBJCNT_DEC(assoc); } /* Change the primary destination address for the peer. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 12ed45dbe75..46bbfc266ef 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -122,7 +122,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the basic object fields. */ atomic_set(&ep->base.refcnt, 1); ep->base.dead = 0; - ep->base.malloced = 1; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); @@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) goto fail; if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; - ep->base.malloced = 1; + SCTP_DBG_OBJCNT_INC(ep); return ep; @@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (ep->base.sk) sock_put(ep->base.sk); - /* Finally, free up our memory. */ - if (ep->base.malloced) { - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); - } + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); } /* Hold a reference to an endpoint. */ -- cgit v1.2.3-70-g09d2 From 0022d2dd4d76e0e7d5c241c343a5016fdfa2ad4f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 15 Apr 2013 03:27:18 +0000 Subject: net: sctp: minor: make sctp_ep_common's member 'dead' a bool Since dead only holds two states (0,1), make it a bool instead of a 'char', which is more appropriate for its purpose. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/associola.c | 4 ++-- net/sctp/endpointola.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3e80eedab17..e12aa77abc5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1176,7 +1176,7 @@ struct sctp_ep_common { * dead - Do not attempt to use this object. */ atomic_t refcnt; - char dead; + bool dead; /* What socket does this endpoint belong to? */ struct sock *sk; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b893aa6862f..423549a714e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -104,7 +104,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); - asoc->base.dead = 0; + asoc->base.dead = false; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -407,7 +407,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Mark as dead, so other users can know this structure is * going away. */ - asoc->base.dead = 1; + asoc->base.dead = true; /* Dispose of any data lying around in the outqueue. */ sctp_outq_free(&asoc->outqueue); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 46bbfc266ef..5fbd7bc6bb1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -121,7 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the basic object fields. */ atomic_set(&ep->base.refcnt, 1); - ep->base.dead = 0; + ep->base.dead = false; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); @@ -233,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, */ void sctp_endpoint_free(struct sctp_endpoint *ep) { - ep->base.dead = 1; + ep->base.dead = true; ep->base.sk->sk_state = SCTP_SS_CLOSED; -- cgit v1.2.3-70-g09d2 From 4cd729b04285b7330edaf5a7080aa795d6d15ff3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 15 Apr 2013 09:54:25 +0000 Subject: net: add dev_uc_sync_multiple() and dev_mc_sync_multiple() api The current implementation of dev_uc_sync/unsync() assumes that there is a strict 1-to-1 relationship between the source and destination of the sync. In other words, once an address has been synced to a destination device, it will not be synced to any other device through the sync API. However, there are some virtual devices that aggreate a number of lower devices and need to sync addresses to all of them. The current API falls short there. This patch introduces a new dev_uc_sync_multiple() api that can be called in the above circumstances and allows sync to work for every invocation. CC: Jiri Pirko Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 + net/core/dev_addr_lists.c | 210 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 171 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 53d3939358a..623b57b5219 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -209,6 +209,7 @@ struct netdev_hw_addr { #define NETDEV_HW_ADDR_T_UNICAST 4 #define NETDEV_HW_ADDR_T_MULTICAST 5 bool global_use; + int sync_cnt; int refcount; int synced; struct rcu_head rcu_head; @@ -2627,6 +2628,7 @@ extern int dev_uc_add(struct net_device *dev, const unsigned char *addr); extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); extern int dev_uc_del(struct net_device *dev, const unsigned char *addr); extern int dev_uc_sync(struct net_device *to, struct net_device *from); +extern int dev_uc_sync_multiple(struct net_device *to, struct net_device *from); extern void dev_uc_unsync(struct net_device *to, struct net_device *from); extern void dev_uc_flush(struct net_device *dev); extern void dev_uc_init(struct net_device *dev); @@ -2638,6 +2640,7 @@ extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); extern int dev_mc_del(struct net_device *dev, const unsigned char *addr); extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); extern int dev_mc_sync(struct net_device *to, struct net_device *from); +extern int dev_mc_sync_multiple(struct net_device *to, struct net_device *from); extern void dev_mc_unsync(struct net_device *to, struct net_device *from); extern void dev_mc_flush(struct net_device *dev); extern void dev_mc_init(struct net_device *dev); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index abdc9e6ef33..c013f38482a 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -22,7 +22,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, + bool sync) { struct netdev_hw_addr *ha; int alloc_size; @@ -37,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->type = addr_type; ha->refcount = 1; ha->global_use = global; - ha->synced = 0; + ha->synced = sync; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -46,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; @@ -63,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, else ha->global_use = true; } + if (sync) { + if (ha->synced) + return 0; + else + ha->synced = true; + } ha->refcount++; return 0; } } - return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); + return __hw_addr_create_ex(list, addr, addr_len, addr_type, global, + sync); } static int __hw_addr_add(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { - return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); + return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, + struct netdev_hw_addr *ha, bool global, + bool sync) +{ + if (global && !ha->global_use) + return -ENOENT; + + if (sync && !ha->synced) + return -ENOENT; + + if (global) + ha->global_use = false; + + if (sync) + ha->synced = false; + + if (--ha->refcount) + return 0; + list_del_rcu(&ha->list); + kfree_rcu(ha, rcu_head); + list->count--; + return 0; } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && - (ha->type == addr_type || !addr_type)) { - if (global) { - if (!ha->global_use) - break; - else - ha->global_use = false; - } - if (--ha->refcount) - return 0; - list_del_rcu(&ha->list); - kfree_rcu(ha, rcu_head); - list->count--; - return 0; - } + (ha->type == addr_type || !addr_type)) + return __hw_addr_del_entry(list, ha, global, sync); } return -ENOENT; } @@ -108,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { - return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); + return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr *ha, + int addr_len) +{ + int err; + + err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, + false, true); + if (err) + return err; + ha->sync_cnt++; + ha->refcount++; + + return 0; +} + +static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + struct netdev_hw_addr *ha, + int addr_len) +{ + int err; + + err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type, + false, true); + if (err) + return; + ha->sync_cnt--; + __hw_addr_del_entry(from_list, ha, false, true); +} + +static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len) +{ + int err = 0; + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { + if (ha->sync_cnt == ha->refcount) { + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); + } else { + err = __hw_addr_sync_one(to_list, ha, addr_len); + if (err) + break; + } + } + return err; } int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, @@ -152,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_del_multiple); +/* This function only works where there is a strict 1-1 relationship + * between source and destionation of they synch. If you ever need to + * sync addresses to more then 1 destination, you need to use + * __hw_addr_sync_multiple(). + */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) @@ -160,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { - if (!ha->synced) { - err = __hw_addr_add(to_list, ha->addr, - addr_len, ha->type); + if (!ha->sync_cnt) { + err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; - ha->synced++; - ha->refcount++; - } else if (ha->refcount == 1) { - __hw_addr_del(to_list, ha->addr, addr_len, ha->type); - __hw_addr_del(from_list, ha->addr, addr_len, ha->type); - } + } else if (ha->refcount == 1) + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } return err; } @@ -183,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { - if (ha->synced) { - __hw_addr_del(to_list, ha->addr, - addr_len, ha->type); - ha->synced--; - __hw_addr_del(from_list, ha->addr, - addr_len, ha->type); - } + if (ha->sync_cnt) + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } } EXPORT_SYMBOL(__hw_addr_unsync); @@ -406,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) } } err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_UNICAST, true); + NETDEV_HW_ADDR_T_UNICAST, true, false); if (!err) __dev_set_rx_mode(dev); out: @@ -469,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del); * locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode - * function of layered software devices. + * function of layered software devices. This function assumes that + * addresses will only ever be synced to the @to devices and no other. */ int dev_uc_sync(struct net_device *to, struct net_device *from) { @@ -487,6 +553,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) } EXPORT_SYMBOL(dev_uc_sync); +/** + * dev_uc_sync_multiple - Synchronize device's unicast list to another + * device, but allow for multiple calls to sync to multiple devices. + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have been deleted from the source. The source device + * must be locked by netif_addr_lock_bh. + * + * This function is intended to be called from the dev->set_rx_mode + * function of layered software devices. It allows for a single source + * device to be synced to multiple destination devices. + */ +int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) +{ + int err = 0; + + if (to->addr_len != from->addr_len) + return -EINVAL; + + netif_addr_lock_nested(to); + err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + netif_addr_unlock(to); + return err; +} +EXPORT_SYMBOL(dev_uc_sync_multiple); + /** * dev_uc_unsync - Remove synchronized addresses from the destination device * @to: destination device @@ -559,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) } } err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, true); + NETDEV_HW_ADDR_T_MULTICAST, true, false); if (!err) __dev_set_rx_mode(dev); out: @@ -575,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global); + NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); @@ -615,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global); + NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); @@ -678,6 +774,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) } EXPORT_SYMBOL(dev_mc_sync); +/** + * dev_mc_sync_multiple - Synchronize device's unicast list to another + * device, but allow for multiple calls to sync to multiple devices. + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_addr_lock_bh. + * + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. It allows for a single + * source device to be synced to multiple destination devices. + */ +int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) +{ + int err = 0; + + if (to->addr_len != from->addr_len) + return -EINVAL; + + netif_addr_lock_nested(to); + err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + netif_addr_unlock(to); + return err; +} +EXPORT_SYMBOL(dev_mc_sync_multiple); + /** * dev_mc_unsync - Remove synchronized addresses from the destination device * @to: destination device -- cgit v1.2.3-70-g09d2 From 37799e52a29af2268d1fbe18908a0d6b9f68af88 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2013 14:02:26 +0100 Subject: mac80211: unify CSA action frame/beacon processing CSA action frame content should be processed as variable IEs rather than fixed to make it extensible. Unify the code and process them just like CSA in beacons to make it easier to extend for HT/VHT. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 +-- net/mac80211/ieee80211_i.h | 4 --- net/mac80211/mlme.c | 71 ++++++++++++++++++++++++++++------------------ net/mac80211/rx.c | 4 --- 4 files changed, 44 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e46fea8b972..8f80b3a9350 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -840,9 +840,7 @@ struct ieee80211_mgmt { } __packed wme_action; struct{ u8 action_code; - u8 element_id; - u8 length; - struct ieee80211_channel_sw_ie sw_elem; + u8 variable[0]; } __packed chan_switch; struct{ u8 action_code; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8d5dcbf17bb..373460f9c06 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1252,10 +1252,6 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); -void -ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - const struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, u64 timestamp); void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2a2c4535449..ade3cd6c337 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1020,33 +1020,37 @@ static void ieee80211_chswitch_timer(unsigned long data) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work); } -void +static void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - const struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, u64 timestamp) + u64 timestamp, struct ieee802_11_elems *elems) { - struct cfg80211_bss *cbss = - container_of((void *)bss, struct cfg80211_bss, priv); - struct ieee80211_channel *new_ch; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num, - cbss->channel->band); + struct cfg80211_bss *cbss = ifmgd->associated; + struct ieee80211_bss *bss; + struct ieee80211_channel *new_ch; + int new_freq; struct ieee80211_chanctx *chanctx; ASSERT_MGD_MTX(ifmgd); - if (!ifmgd->associated) + if (!cbss) return; if (sdata->local->scanning) return; - /* Disregard subsequent beacons if we are already running a timer - processing a CSA */ - + /* disregard subsequent announcements if we are already processing */ if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) return; + if (!elems->ch_switch_ie) + return; + + bss = (void *)cbss->priv; + + new_freq = ieee80211_channel_to_frequency( + elems->ch_switch_ie->new_ch_num, + cbss->channel->band); new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { sdata_info(sdata, @@ -1086,7 +1090,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->local->csa_channel = new_ch; - if (sw_elem->mode) + if (elems->ch_switch_ie->mode) ieee80211_stop_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); @@ -1095,9 +1099,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, /* use driver's channel switch callback */ struct ieee80211_channel_switch ch_switch = { .timestamp = timestamp, - .block_tx = sw_elem->mode, + .block_tx = elems->ch_switch_ie->mode, .channel = new_ch, - .count = sw_elem->count, + .count = elems->ch_switch_ie->count, }; drv_channel_switch(sdata->local, &ch_switch); @@ -1105,11 +1109,11 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } /* channel switch handled in software */ - if (sw_elem->count <= 1) + if (elems->ch_switch_ie->count <= 1) ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); else mod_timer(&ifmgd->chswitch_timer, - TU_TO_EXP_TIME(sw_elem->count * + TU_TO_EXP_TIME(elems->ch_switch_ie->count * cbss->beacon_interval)); } @@ -2655,7 +2659,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (bss) ieee80211_rx_bss_put(local, bss); - if (!sdata->u.mgd.associated) + if (!sdata->u.mgd.associated || + !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) return; if (need_ps) { @@ -2664,10 +2669,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->iflist_mtx); } - if (elems->ch_switch_ie && - memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0) - ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie, - bss, rx_status->mactime); + ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems); } @@ -3061,14 +3063,27 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss); break; case IEEE80211_STYPE_ACTION: - switch (mgmt->u.action.category) { - case WLAN_CATEGORY_SPECTRUM_MGMT: + if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { + struct ieee802_11_elems elems; + int ies_len = skb->len - + offsetof(struct ieee80211_mgmt, + u.action.u.chan_switch.variable); + + if (ies_len < 0) + break; + + ieee802_11_parse_elems( + mgmt->u.action.u.chan_switch.variable, + ies_len, &elems); + + if (elems.parse_error) + break; + ieee80211_sta_process_chanswitch(sdata, - &mgmt->u.action.u.chan_switch.sw_elem, - (void *)ifmgd->associated->priv, - rx_status->mactime); - break; + rx_status->mactime, + &elems); } + break; } mutex_unlock(&ifmgd->mtx); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5168f89c754..e9825f15c14 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2507,10 +2507,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) ieee80211_process_measurement_req(sdata, mgmt, len); goto handled; case WLAN_ACTION_SPCT_CHL_SWITCH: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.chan_switch))) - break; - if (sdata->vif.type != NL80211_IFTYPE_STATION) break; -- cgit v1.2.3-70-g09d2 From 1ce3e82b0eb472161313183be0033e46d5c4bbaf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Aug 2012 17:00:55 +0200 Subject: cfg80211: add ieee80211_operating_class_to_band This function converts a (global only!) operating class to an internal band identifier. This will be needed for extended channel switch support. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ net/wireless/util.c | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 57870b64697..dff96d8cafc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4024,6 +4024,17 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef); +/** + * ieee80211_operating_class_to_band - convert operating class to band + * + * @operating_class: the operating class to convert + * @band: band pointer to fill + * + * Returns %true if the conversion was successful, %false otherwise. + */ +bool ieee80211_operating_class_to_band(u8 operating_class, + enum ieee80211_band *band); + /* * cfg80211_tdls_oper_request - request userspace to perform TDLS operation * @dev: the device on which the operation is requested diff --git a/net/wireless/util.c b/net/wireless/util.c index 37a56ee1e1e..3d8a1334f4a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1155,6 +1155,26 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, } EXPORT_SYMBOL(cfg80211_get_p2p_attr); +bool ieee80211_operating_class_to_band(u8 operating_class, + enum ieee80211_band *band) +{ + switch (operating_class) { + case 112: + case 115 ... 127: + *band = IEEE80211_BAND_5GHZ; + return true; + case 81: + case 82: + case 83: + case 84: + *band = IEEE80211_BAND_2GHZ; + return true; + } + + return false; +} +EXPORT_SYMBOL(ieee80211_operating_class_to_band); + int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, u32 beacon_int) { -- cgit v1.2.3-70-g09d2 From b4f286a1c0ad0b84c2d502b354d4d98d5a86c64b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2013 14:13:58 +0100 Subject: mac80211: support extended channel switch Support extended channel switch when the operating class is one of the global operating classes as defined in Annex E of 802.11-2012. If it isn't, disconnect from the AP instead. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 12 ++++++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 77 ++++++++++++++++++++++++++++++---------------- net/mac80211/util.c | 7 +++++ 4 files changed, 71 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 8f80b3a9350..2a10acc65a5 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -672,6 +672,18 @@ struct ieee80211_channel_sw_ie { u8 count; } __packed; +/** + * struct ieee80211_ext_chansw_ie + * + * This structure represents the "Extended Channel Switch Announcement element" + */ +struct ieee80211_ext_chansw_ie { + u8 mode; + u8 new_operating_class; + u8 new_ch_num; + u8 count; +} __packed; + /** * struct ieee80211_tim * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 373460f9c06..10c3180b165 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1178,6 +1178,7 @@ struct ieee802_11_elems { const u8 *perr; const struct ieee80211_rann_ie *rann; const struct ieee80211_channel_sw_ie *ch_switch_ie; + const struct ieee80211_ext_chansw_ie *ext_chansw_ie; const u8 *country_elem; const u8 *pwr_constr_elem; const struct ieee80211_timeout_interval_ie *timeout_int; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ade3cd6c337..bc6f87edc62 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1024,56 +1024,79 @@ static void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, u64 timestamp, struct ieee802_11_elems *elems) { + struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_bss *cbss = ifmgd->associated; struct ieee80211_bss *bss; struct ieee80211_channel *new_ch; - int new_freq; struct ieee80211_chanctx *chanctx; + enum ieee80211_band new_band; + int new_freq; + u8 new_chan_no; + u8 count; + u8 mode; ASSERT_MGD_MTX(ifmgd); if (!cbss) return; - if (sdata->local->scanning) + if (local->scanning) return; /* disregard subsequent announcements if we are already processing */ if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) return; - if (!elems->ch_switch_ie) + if (elems->ext_chansw_ie) { + if (!ieee80211_operating_class_to_band( + elems->ext_chansw_ie->new_operating_class, + &new_band)) { + sdata_info(sdata, + "cannot understand ECSA IE operating class %d, disconnecting\n", + elems->ext_chansw_ie->new_operating_class); + ieee80211_queue_work(&local->hw, + &ifmgd->csa_connection_drop_work); + } + new_chan_no = elems->ext_chansw_ie->new_ch_num; + count = elems->ext_chansw_ie->count; + mode = elems->ext_chansw_ie->mode; + } else if (elems->ch_switch_ie) { + new_band = cbss->channel->band; + new_chan_no = elems->ch_switch_ie->new_ch_num; + count = elems->ch_switch_ie->count; + mode = elems->ch_switch_ie->mode; + } else { + /* nothing here we understand */ return; + } bss = (void *)cbss->priv; - new_freq = ieee80211_channel_to_frequency( - elems->ch_switch_ie->new_ch_num, - cbss->channel->band); - new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); + new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); + new_ch = ieee80211_get_channel(local->hw.wiphy, new_freq); if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { sdata_info(sdata, "AP %pM switches to unsupported channel (%d MHz), disconnecting\n", ifmgd->associated->bssid, new_freq); - ieee80211_queue_work(&sdata->local->hw, + ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); return; } ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; - if (sdata->local->use_chanctx) { + if (local->use_chanctx) { sdata_info(sdata, "not handling channel switch with channel contexts\n"); - ieee80211_queue_work(&sdata->local->hw, + ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); return; } - mutex_lock(&sdata->local->chanctx_mtx); + mutex_lock(&local->chanctx_mtx); if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) { - mutex_unlock(&sdata->local->chanctx_mtx); + mutex_unlock(&local->chanctx_mtx); return; } chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf), @@ -1081,40 +1104,39 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (chanctx->refcount > 1) { sdata_info(sdata, "channel switch with multiple interfaces on the same channel, disconnecting\n"); - ieee80211_queue_work(&sdata->local->hw, + ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); - mutex_unlock(&sdata->local->chanctx_mtx); + mutex_unlock(&local->chanctx_mtx); return; } - mutex_unlock(&sdata->local->chanctx_mtx); + mutex_unlock(&local->chanctx_mtx); - sdata->local->csa_channel = new_ch; + local->csa_channel = new_ch; - if (elems->ch_switch_ie->mode) - ieee80211_stop_queues_by_reason(&sdata->local->hw, + if (mode) + ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - if (sdata->local->ops->channel_switch) { + if (local->ops->channel_switch) { /* use driver's channel switch callback */ struct ieee80211_channel_switch ch_switch = { .timestamp = timestamp, - .block_tx = elems->ch_switch_ie->mode, + .block_tx = mode, .channel = new_ch, - .count = elems->ch_switch_ie->count, + .count = count, }; - drv_channel_switch(sdata->local, &ch_switch); + drv_channel_switch(local, &ch_switch); return; } /* channel switch handled in software */ - if (elems->ch_switch_ie->count <= 1) - ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); + if (count <= 1) + ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work); else mod_timer(&ifmgd->chswitch_timer, - TU_TO_EXP_TIME(elems->ch_switch_ie->count * - cbss->beacon_interval)); + TU_TO_EXP_TIME(count * cbss->beacon_interval)); } static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, @@ -2629,6 +2651,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; bool need_ps = false; + lockdep_assert_held(&sdata->u.mgd.mtx); + if ((sdata->u.mgd.associated && ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || (sdata->u.mgd.assoc_data && @@ -2670,6 +2694,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems); + } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1d6217ac3ba..e4a6d559372 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -863,6 +863,13 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, } elems->ch_switch_ie = (void *)pos; break; + case WLAN_EID_EXT_CHANSWITCH_ANN: + if (elen != sizeof(struct ieee80211_ext_chansw_ie)) { + elem_parse_failed = true; + break; + } + elems->ext_chansw_ie = (void *)pos; + break; case WLAN_EID_COUNTRY: elems->country_elem = pos; elems->country_elem_len = elen; -- cgit v1.2.3-70-g09d2 From 85220d71bf3ca1ba9129e0744247ae5f61bec559 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Mar 2013 18:29:27 +0100 Subject: mac80211: support secondary channel offset in CSA Add support for the secondary channel offset IE in channel switch announcements. This is necessary for proper handling of CSA on HT access points. For this to work it is also necessary to convert everything here to use chandef structs instead of just channels. The driver updates aren't really correct though. In particular, the TI wl18xx driver update can't possibly be right since it just ignores the new channel width for lack of firmware API. Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/4965-mac.c | 32 ++++++------- drivers/net/wireless/iwlegacy/4965.c | 2 +- drivers/net/wireless/iwlwifi/dvm/devices.c | 10 ++-- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 20 ++++++-- drivers/net/wireless/iwlwifi/dvm/rxon.c | 2 +- drivers/net/wireless/ti/wl12xx/cmd.c | 2 +- drivers/net/wireless/ti/wl18xx/cmd.c | 6 +-- include/linux/ieee80211.h | 11 +++++ include/net/mac80211.h | 4 +- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/mlme.c | 71 +++++++++++++++++++++++------ net/mac80211/trace.h | 8 ++-- net/mac80211/util.c | 8 ++++ 13 files changed, 125 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index c092fcbbe96..cb5882ea5f3 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -6057,7 +6057,7 @@ il4965_mac_channel_switch(struct ieee80211_hw *hw, struct il_priv *il = hw->priv; const struct il_channel_info *ch_info; struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = ch_switch->channel; + struct ieee80211_channel *channel = ch_switch->chandef.chan; struct il_ht_config *ht_conf = &il->current_ht_config; u16 ch; @@ -6094,23 +6094,21 @@ il4965_mac_channel_switch(struct ieee80211_hw *hw, il->current_ht_config.smps = conf->smps_mode; /* Configure HT40 channels */ - il->ht.enabled = conf_is_ht(conf); - if (il->ht.enabled) { - if (conf_is_ht40_minus(conf)) { - il->ht.extension_chan_offset = - IEEE80211_HT_PARAM_CHA_SEC_BELOW; - il->ht.is_40mhz = true; - } else if (conf_is_ht40_plus(conf)) { - il->ht.extension_chan_offset = - IEEE80211_HT_PARAM_CHA_SEC_ABOVE; - il->ht.is_40mhz = true; - } else { - il->ht.extension_chan_offset = - IEEE80211_HT_PARAM_CHA_SEC_NONE; - il->ht.is_40mhz = false; - } - } else + switch (cfg80211_get_chandef_type(&ch_switch->chandef)) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: il->ht.is_40mhz = false; + il->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + break; + case NL80211_CHAN_HT40MINUS: + il->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + il->ht.is_40mhz = true; + break; + case NL80211_CHAN_HT40PLUS: + il->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + il->ht.is_40mhz = true; + break; + } if ((le16_to_cpu(il->staging.channel) != ch)) il->staging.flags = 0; diff --git a/drivers/net/wireless/iwlegacy/4965.c b/drivers/net/wireless/iwlegacy/4965.c index 91eb2d07fdb..777a578294b 100644 --- a/drivers/net/wireless/iwlegacy/4965.c +++ b/drivers/net/wireless/iwlegacy/4965.c @@ -1493,7 +1493,7 @@ il4965_hw_channel_switch(struct il_priv *il, cmd.band = band; cmd.expect_beacon = 0; - ch = ch_switch->channel->hw_value; + ch = ch_switch->chandef.chan->hw_value; cmd.channel = cpu_to_le16(ch); cmd.rxon_flags = il->staging.flags; cmd.rxon_filter_flags = il->staging.filter_flags; diff --git a/drivers/net/wireless/iwlwifi/dvm/devices.c b/drivers/net/wireless/iwlwifi/dvm/devices.c index 15cca2ef929..c48907c8ab4 100644 --- a/drivers/net/wireless/iwlwifi/dvm/devices.c +++ b/drivers/net/wireless/iwlwifi/dvm/devices.c @@ -379,7 +379,7 @@ static int iwl5000_hw_channel_switch(struct iwl_priv *priv, }; cmd.band = priv->band == IEEE80211_BAND_2GHZ; - ch = ch_switch->channel->hw_value; + ch = ch_switch->chandef.chan->hw_value; IWL_DEBUG_11H(priv, "channel switch from %d to %d\n", ctx->active.channel, ch); cmd.channel = cpu_to_le16(ch); @@ -414,7 +414,8 @@ static int iwl5000_hw_channel_switch(struct iwl_priv *priv, } IWL_DEBUG_11H(priv, "uCode time for the switch is 0x%x\n", cmd.switch_time); - cmd.expect_beacon = ch_switch->channel->flags & IEEE80211_CHAN_RADAR; + cmd.expect_beacon = + ch_switch->chandef.chan->flags & IEEE80211_CHAN_RADAR; return iwl_dvm_send_cmd(priv, &hcmd); } @@ -540,7 +541,7 @@ static int iwl6000_hw_channel_switch(struct iwl_priv *priv, hcmd.data[0] = cmd; cmd->band = priv->band == IEEE80211_BAND_2GHZ; - ch = ch_switch->channel->hw_value; + ch = ch_switch->chandef.chan->hw_value; IWL_DEBUG_11H(priv, "channel switch from %u to %u\n", ctx->active.channel, ch); cmd->channel = cpu_to_le16(ch); @@ -575,7 +576,8 @@ static int iwl6000_hw_channel_switch(struct iwl_priv *priv, } IWL_DEBUG_11H(priv, "uCode time for the switch is 0x%x\n", cmd->switch_time); - cmd->expect_beacon = ch_switch->channel->flags & IEEE80211_CHAN_RADAR; + cmd->expect_beacon = + ch_switch->chandef.chan->flags & IEEE80211_CHAN_RADAR; err = iwl_dvm_send_cmd(priv, &hcmd); kfree(cmd); diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index a7294fa4d7e..2dc101fe0d2 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -967,7 +967,7 @@ static void iwlagn_mac_channel_switch(struct ieee80211_hw *hw, { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); struct ieee80211_conf *conf = &hw->conf; - struct ieee80211_channel *channel = ch_switch->channel; + struct ieee80211_channel *channel = ch_switch->chandef.chan; struct iwl_ht_config *ht_conf = &priv->current_ht_config; /* * MULTI-FIXME @@ -1005,11 +1005,21 @@ static void iwlagn_mac_channel_switch(struct ieee80211_hw *hw, priv->current_ht_config.smps = conf->smps_mode; /* Configure HT40 channels */ - ctx->ht.enabled = conf_is_ht(conf); - if (ctx->ht.enabled) - iwlagn_config_ht40(conf, ctx); - else + switch (cfg80211_get_chandef_type(&ch_switch->chandef)) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: ctx->ht.is_40mhz = false; + ctx->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + break; + case NL80211_CHAN_HT40MINUS: + ctx->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + ctx->ht.is_40mhz = true; + break; + case NL80211_CHAN_HT40PLUS: + ctx->ht.extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + ctx->ht.is_40mhz = true; + break; + } if ((le16_to_cpu(ctx->staging.channel) != ch)) ctx->staging.flags = 0; diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index 085c589e714..acbb50b5f1e 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -1160,7 +1160,7 @@ int iwlagn_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *ctx) } void iwlagn_config_ht40(struct ieee80211_conf *conf, - struct iwl_rxon_context *ctx) + struct iwl_rxon_context *ctx) { if (conf_is_ht40_minus(conf)) { ctx->ht.extension_chan_offset = diff --git a/drivers/net/wireless/ti/wl12xx/cmd.c b/drivers/net/wireless/ti/wl12xx/cmd.c index 7dc9f965037..7485dbae8c4 100644 --- a/drivers/net/wireless/ti/wl12xx/cmd.c +++ b/drivers/net/wireless/ti/wl12xx/cmd.c @@ -301,7 +301,7 @@ int wl12xx_cmd_channel_switch(struct wl1271 *wl, } cmd->role_id = wlvif->role_id; - cmd->channel = ch_switch->channel->hw_value; + cmd->channel = ch_switch->chandef.chan->hw_value; cmd->switch_time = ch_switch->count; cmd->stop_tx = ch_switch->block_tx; diff --git a/drivers/net/wireless/ti/wl18xx/cmd.c b/drivers/net/wireless/ti/wl18xx/cmd.c index 1d1f6cc7a50..7649c75cd68 100644 --- a/drivers/net/wireless/ti/wl18xx/cmd.c +++ b/drivers/net/wireless/ti/wl18xx/cmd.c @@ -42,11 +42,11 @@ int wl18xx_cmd_channel_switch(struct wl1271 *wl, } cmd->role_id = wlvif->role_id; - cmd->channel = ch_switch->channel->hw_value; + cmd->channel = ch_switch->chandef.chan->hw_value; cmd->switch_time = ch_switch->count; cmd->stop_tx = ch_switch->block_tx; - switch (ch_switch->channel->band) { + switch (ch_switch->chandef.chan->band) { case IEEE80211_BAND_2GHZ: cmd->band = WLCORE_BAND_2_4GHZ; break; @@ -55,7 +55,7 @@ int wl18xx_cmd_channel_switch(struct wl1271 *wl, break; default: wl1271_error("invalid channel switch band: %d", - ch_switch->channel->band); + ch_switch->chandef.chan->band); ret = -EINVAL; goto out_free; } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2a10acc65a5..95621528436 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -684,6 +684,16 @@ struct ieee80211_ext_chansw_ie { u8 count; } __packed; +/** + * struct ieee80211_sec_chan_offs_ie - secondary channel offset IE + * @sec_chan_offs: secondary channel offset, uses IEEE80211_HT_PARAM_CHA_SEC_* + * values here + * This structure represents the "Secondary Channel Offset element" + */ +struct ieee80211_sec_chan_offs_ie { + u8 sec_chan_offs; +} __packed; + /** * struct ieee80211_tim * @@ -1648,6 +1658,7 @@ enum ieee80211_eid { WLAN_EID_HT_CAPABILITY = 45, WLAN_EID_HT_OPERATION = 61, + WLAN_EID_SECONDARY_CHANNEL_OFFSET = 62, WLAN_EID_RSN = 48, WLAN_EID_MMIE = 76, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0dde213dd3b..9ff10b33b71 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1017,13 +1017,13 @@ struct ieee80211_conf { * the driver passed into mac80211. * @block_tx: Indicates whether transmission must be blocked before the * scheduled channel switch, as indicated by the AP. - * @channel: the new channel to switch to + * @chandef: the new channel to switch to * @count: the number of TBTT's until the channel switch event */ struct ieee80211_channel_switch { u64 timestamp; bool block_tx; - struct ieee80211_channel *channel; + struct cfg80211_chan_def chandef; u8 count; }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 10c3180b165..8f240c0ec30 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1019,7 +1019,7 @@ struct ieee80211_local { enum mac80211_scan_state next_scan_state; struct delayed_work scan_work; struct ieee80211_sub_if_data __rcu *scan_sdata; - struct ieee80211_channel *csa_channel; + struct cfg80211_chan_def csa_chandef; /* For backward compatibility only -- do not use */ struct cfg80211_chan_def _oper_chandef; @@ -1183,6 +1183,7 @@ struct ieee802_11_elems { const u8 *pwr_constr_elem; const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; + const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; /* length of them, respectively */ u8 ssid_len; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bc6f87edc62..bd581a80e4b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -289,6 +289,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } else { /* 40 MHz (and 80 MHz) must be supported for VHT */ ret = IEEE80211_STA_DISABLE_VHT; + /* also mark 40 MHz disabled */ + ret |= IEEE80211_STA_DISABLE_40MHZ; goto out; } @@ -964,16 +966,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - /* - * FIXME: Here we are downgrading to NL80211_CHAN_WIDTH_20_NOHT - * and don't adjust our ht/vht settings - * This is wrong - we should behave according to the CSA params - */ - local->_oper_chandef.chan = local->csa_channel; - local->_oper_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - local->_oper_chandef.center_freq1 = - local->_oper_chandef.chan->center_freq; - local->_oper_chandef.center_freq2 = 0; + local->_oper_chandef = local->csa_chandef; if (!local->ops->channel_switch) { /* call "hw_config" only if doing sw channel switch */ @@ -1028,13 +1021,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_bss *cbss = ifmgd->associated; struct ieee80211_bss *bss; - struct ieee80211_channel *new_ch; struct ieee80211_chanctx *chanctx; enum ieee80211_band new_band; int new_freq; u8 new_chan_no; u8 count; u8 mode; + struct cfg80211_chan_def new_chandef = {}; + int secondary_channel_offset = -1; ASSERT_MGD_MTX(ifmgd); @@ -1048,6 +1042,19 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) return; + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { + /* if HT is enabled and the IE not present, it's still HT */ + secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + if (elems->sec_chan_offs) + secondary_channel_offset = + elems->sec_chan_offs->sec_chan_offs; + } + + if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ && + (secondary_channel_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE || + secondary_channel_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW)) + secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + if (elems->ext_chansw_ie) { if (!ieee80211_operating_class_to_band( elems->ext_chansw_ie->new_operating_class, @@ -1074,8 +1081,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, bss = (void *)cbss->priv; new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); - new_ch = ieee80211_get_channel(local->hw.wiphy, new_freq); - if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { + new_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); + if (!new_chandef.chan || + new_chandef.chan->flags & IEEE80211_CHAN_DISABLED) { sdata_info(sdata, "AP %pM switches to unsupported channel (%d MHz), disconnecting\n", ifmgd->associated->bssid, new_freq); @@ -1084,6 +1092,39 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } + switch (secondary_channel_offset) { + default: + /* secondary_channel_offset was present but is invalid */ + case IEEE80211_HT_PARAM_CHA_SEC_NONE: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_HT20); + break; + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_HT40PLUS); + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_HT40MINUS); + break; + case -1: + cfg80211_chandef_create(&new_chandef, new_chandef.chan, + NL80211_CHAN_NO_HT); + break; + } + + if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef, + IEEE80211_CHAN_DISABLED)) { + sdata_info(sdata, + "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", + ifmgd->associated->bssid, new_freq, + new_chandef.width, new_chandef.center_freq1, + new_chandef.center_freq2); + ieee80211_queue_work(&local->hw, + &ifmgd->csa_connection_drop_work); + return; + } + ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; if (local->use_chanctx) { @@ -1111,7 +1152,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&local->chanctx_mtx); - local->csa_channel = new_ch; + local->csa_chandef = new_chandef; if (mode) ieee80211_stop_queues_by_reason(&local->hw, @@ -1123,7 +1164,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch ch_switch = { .timestamp = timestamp, .block_tx = mode, - .channel = new_ch, + .chandef = new_chandef, .count = count, }; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 8286dcef228..c215fafd7a2 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -990,23 +990,23 @@ TRACE_EVENT(drv_channel_switch, TP_STRUCT__entry( LOCAL_ENTRY + CHANDEF_ENTRY __field(u64, timestamp) __field(bool, block_tx) - __field(u16, freq) __field(u8, count) ), TP_fast_assign( LOCAL_ASSIGN; + CHANDEF_ASSIGN(&ch_switch->chandef) __entry->timestamp = ch_switch->timestamp; __entry->block_tx = ch_switch->block_tx; - __entry->freq = ch_switch->channel->center_freq; __entry->count = ch_switch->count; ), TP_printk( - LOCAL_PR_FMT " new freq:%u count:%d", - LOCAL_PR_ARG, __entry->freq, __entry->count + LOCAL_PR_FMT " new " CHANDEF_PR_FMT " count:%d", + LOCAL_PR_ARG, CHANDEF_PR_ARG, __entry->count ) ); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e4a6d559372..155056c90ed 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -716,6 +716,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, case WLAN_EID_COUNTRY: case WLAN_EID_PWR_CONSTRAINT: case WLAN_EID_TIMEOUT_INTERVAL: + case WLAN_EID_SECONDARY_CHANNEL_OFFSET: if (test_bit(id, seen_elems)) { elems->parse_error = true; left -= elen; @@ -870,6 +871,13 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, } elems->ext_chansw_ie = (void *)pos; break; + case WLAN_EID_SECONDARY_CHANNEL_OFFSET: + if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { + elem_parse_failed = true; + break; + } + elems->sec_chan_offs = (void *)pos; + break; case WLAN_EID_COUNTRY: elems->country_elem = pos; elems->country_elem_len = elen; -- cgit v1.2.3-70-g09d2 From 1b3a2e494bc793445f576c5476e9767cf7621684 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2013 15:17:18 +0100 Subject: mac80211: handle extended channel switch announcement Handle the (public) extended channel switch announcement action frames. Parts of the data in these frames isn't really in IEs, but put it into the elems struct anyway to simplify the handling. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 ++++++ net/mac80211/mlme.c | 31 +++++++++++++++++++++++++++---- net/mac80211/rx.c | 16 ++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 95621528436..ce07161c873 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -864,6 +864,11 @@ struct ieee80211_mgmt { u8 action_code; u8 variable[0]; } __packed chan_switch; + struct{ + u8 action_code; + struct ieee80211_ext_chansw_ie data; + u8 variable[0]; + } __packed ext_chan_switch; struct{ u8 action_code; u8 dialog_token; @@ -1816,6 +1821,7 @@ enum ieee80211_key_len { /* Public action codes */ enum ieee80211_pub_actioncode { + WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, WLAN_PUB_ACTION_TDLS_DISCOVER_RES = 14, }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bd581a80e4b..c53aedb47a6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3100,6 +3100,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, enum rx_mgmt_action rma = RX_MGMT_NONE; u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; u16 fc; + struct ieee802_11_elems elems; + int ies_len; rx_status = (struct ieee80211_rx_status *) skb->cb; mgmt = (struct ieee80211_mgmt *) skb->data; @@ -3130,10 +3132,9 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { - struct ieee802_11_elems elems; - int ies_len = skb->len - - offsetof(struct ieee80211_mgmt, - u.action.u.chan_switch.variable); + ies_len = skb->len - + offsetof(struct ieee80211_mgmt, + u.action.u.chan_switch.variable); if (ies_len < 0) break; @@ -3145,6 +3146,28 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, if (elems.parse_error) break; + ieee80211_sta_process_chanswitch(sdata, + rx_status->mactime, + &elems); + } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { + ies_len = skb->len - + offsetof(struct ieee80211_mgmt, + u.action.u.ext_chan_switch.variable); + + if (ies_len < 0) + break; + + ieee802_11_parse_elems( + mgmt->u.action.u.ext_chan_switch.variable, + ies_len, &elems); + + if (elems.parse_error) + break; + + /* for the handling code pretend this was also an IE */ + elems.ext_chansw_ie = + &mgmt->u.action.u.ext_chan_switch.data; + ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, &elems); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e9825f15c14..643fcf7c9dc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2424,6 +2424,22 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; + case WLAN_CATEGORY_PUBLIC: + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + goto invalid; + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + if (!rx->sta) + break; + if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid)) + break; + if (mgmt->u.action.u.ext_chan_switch.action_code != + WLAN_PUB_ACTION_EXT_CHANSW_ANN) + break; + if (len < offsetof(struct ieee80211_mgmt, + u.action.u.ext_chan_switch.variable)) + goto invalid; + goto queue; case WLAN_CATEGORY_VHT: if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && -- cgit v1.2.3-70-g09d2 From b2e506bfc4d752b68a0ccaae1e977898263eba4c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2013 14:54:16 +0100 Subject: mac80211: parse VHT channel switch IEs VHT introduces multiple IEs that need to be parsed for a wide bandwidth channel switch. Two are (currently) needed in mac80211: * wide bandwidth channel switch element * channel switch wrapper element The former is contained in the latter for beacons and probe responses, but not for the spectrum management action frames so the IE parser needs a new argument to differentiate them. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ net/mac80211/ibss.c | 2 +- net/mac80211/ieee80211_i.h | 7 ++++--- net/mac80211/mesh.c | 4 ++-- net/mac80211/mesh_hwmp.c | 2 +- net/mac80211/mesh_plink.c | 2 +- net/mac80211/mlme.c | 16 ++++++++-------- net/mac80211/scan.c | 2 +- net/mac80211/util.c | 36 +++++++++++++++++++++++++++++++++++- 9 files changed, 63 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce07161c873..06b0ed0154a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -694,6 +694,14 @@ struct ieee80211_sec_chan_offs_ie { u8 sec_chan_offs; } __packed; +/** + * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE + */ +struct ieee80211_wide_bw_chansw_ie { + u8 new_channel_width; + u8 new_center_freq_seg0, new_center_freq_seg1; +} __packed; + /** * struct ieee80211_tim * @@ -1698,6 +1706,8 @@ enum ieee80211_eid { WLAN_EID_VHT_CAPABILITY = 191, WLAN_EID_VHT_OPERATION = 192, WLAN_EID_OPMODE_NOTIF = 199, + WLAN_EID_WIDE_BW_CHANNEL_SWITCH = 194, + WLAN_EID_CHANNEL_SWITCH_WRAPPER = 196, /* 802.11ad */ WLAN_EID_NON_TX_BSSID_CAP = 83, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index b7bf6d76f1d..170f9a7fa31 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -914,7 +914,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, return; ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, - &elems); + false, &elems); ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8f240c0ec30..f4a65a340a5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1179,6 +1179,7 @@ struct ieee802_11_elems { const struct ieee80211_rann_ie *rann; const struct ieee80211_channel_sw_ie *ch_switch_ie; const struct ieee80211_ext_chansw_ie *ext_chansw_ie; + const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; const u8 *country_elem; const u8 *pwr_constr_elem; const struct ieee80211_timeout_interval_ie *timeout_int; @@ -1490,13 +1491,13 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb_tid(sdata, skb, 7); } -u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, +u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action, struct ieee802_11_elems *elems, u64 filter, u32 crc); -static inline void ieee802_11_parse_elems(u8 *start, size_t len, +static inline void ieee802_11_parse_elems(u8 *start, size_t len, bool action, struct ieee802_11_elems *elems) { - ieee802_11_parse_elems_crc(start, len, elems, 0, 0); + ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); } u32 ieee80211_mandatory_rates(struct ieee80211_local *local, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 0acc2874d29..4b984765d62 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -838,7 +838,7 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - ieee802_11_parse_elems(pos, len - baselen, &elems); + ieee802_11_parse_elems(pos, len - baselen, false, &elems); /* 802.11-2012 10.1.4.3.2 */ if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && @@ -899,7 +899,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, return; ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, - &elems); + false, &elems); /* ignore non-mesh or secure / unsecure mismatch */ if ((!elems.mesh_id || !elems.mesh_config) || diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index c82d5e6a24c..486819cd02c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -880,7 +880,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, - len - baselen, &elems); + len - baselen, false, &elems); if (elems.preq) { if (elems.preq_len != 37) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index cdd41835334..09bebed9941 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -687,7 +687,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, baseaddr += 4; baselen += 4; } - ieee802_11_parse_elems(baseaddr, len - baselen, &elems); + ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems); if (!elems.peering) { mpl_dbg(sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c53aedb47a6..3e0421265bf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2203,7 +2203,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, u32 tx_flags = 0; pos = mgmt->u.auth.variable; - ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems); if (!elems.challenge) return; auth_data->expected_transaction = 4; @@ -2468,7 +2468,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, } pos = mgmt->u.assoc_resp.variable; - ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems); if (!elems.supp_rates) { sdata_info(sdata, "no SuppRates element in AssocResp\n"); @@ -2637,7 +2637,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); pos = mgmt->u.assoc_resp.variable; - ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems); if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && elems.timeout_int && @@ -2760,7 +2760,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, return; ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, - &elems); + false, &elems); ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); @@ -2843,7 +2843,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) { ieee802_11_parse_elems(mgmt->u.beacon.variable, - len - baselen, &elems); + len - baselen, false, &elems); ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); ifmgd->assoc_data->have_beacon = true; @@ -2953,7 +2953,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable, - len - baselen, &elems, + len - baselen, false, &elems, care_about_ies, ncrc); if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) { @@ -3141,7 +3141,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems( mgmt->u.action.u.chan_switch.variable, - ies_len, &elems); + ies_len, true, &elems); if (elems.parse_error) break; @@ -3159,7 +3159,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems( mgmt->u.action.u.ext_chan_switch.variable, - ies_len, &elems); + ies_len, true, &elems); if (elems.parse_error) break; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 33fbf104569..99b103921a4 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -181,7 +181,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (baselen > skb->len) return; - ieee802_11_parse_elems(elements, skb->len - baselen, &elems); + ieee802_11_parse_elems(elements, skb->len - baselen, false, &elems); channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 155056c90ed..3f87fa468b1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -661,7 +661,7 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_queue_delayed_work); -u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, +u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action, struct ieee802_11_elems *elems, u64 filter, u32 crc) { @@ -669,6 +669,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, u8 *pos = start; bool calc_crc = filter != 0; DECLARE_BITMAP(seen_elems, 256); + const u8 *ie; bitmap_zero(seen_elems, 256); memset(elems, 0, sizeof(*elems)); @@ -717,6 +718,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, case WLAN_EID_PWR_CONSTRAINT: case WLAN_EID_TIMEOUT_INTERVAL: case WLAN_EID_SECONDARY_CHANNEL_OFFSET: + case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: + /* + * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible + * that if the content gets bigger it might be needed more than once + */ if (test_bit(id, seen_elems)) { elems->parse_error = true; left -= elen; @@ -878,6 +884,34 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, } elems->sec_chan_offs = (void *)pos; break; + case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: + if (!action || + elen != sizeof(*elems->wide_bw_chansw_ie)) { + elem_parse_failed = true; + break; + } + elems->wide_bw_chansw_ie = (void *)pos; + break; + case WLAN_EID_CHANNEL_SWITCH_WRAPPER: + if (action) { + elem_parse_failed = true; + break; + } + /* + * This is a bit tricky, but as we only care about + * the wide bandwidth channel switch element, so + * just parse it out manually. + */ + ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, + pos, elen); + if (ie) { + if (ie[1] == sizeof(*elems->wide_bw_chansw_ie)) + elems->wide_bw_chansw_ie = + (void *)(ie + 2); + else + elem_parse_failed = true; + } + break; case WLAN_EID_COUNTRY: elems->country_elem = pos; elems->country_elem_len = elen; -- cgit v1.2.3-70-g09d2 From 6bc8312f95f982c0a6f26e87d0a6c299a697ed53 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 15 Apr 2013 17:09:29 +0200 Subject: mac80211: VHT off-by-one NSS The number of VHT spatial streams (NSS) is found in: - s8 ieee80211_tx_rate.rate.idx[6:4] (tx - filled by rate control) - u8 ieee80211_rx_status.vht_nss (rx - filled by driver) Tx discriminates valid rates indexes with the sign bit and encodes NSS starting from 0 to 7 (note this matches some hw encodings e.g IWLMVM). Rx does not have the same constraints, and encodes NSS starting from 1 to 8 (note this matches what wireshark expects in the radiotap header). To handle ieee80211_tx_rate.rate.idx[6:4] ieee80211_rate_set_vht() and ieee80211_rate_get_vht_nss() assume their nss parameter and return value respectively runs from 0 to 7. ATM, there are only 2 users of these: cfg.c:sta_set_rate_info_t() and iwlwifi/mvm/tx.c:iwl_mvm_hwrate_to_tx_control(), but both assume nss runs from 1 to 8. This patch fixes this inconsistency by making ieee80211_rate_set_vht() and ieee80211_rate_get_vht_nss() handle an nss running from 1 to 8. Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9ff10b33b71..bc5d8184c48 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -601,8 +601,8 @@ static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate, u8 mcs, u8 nss) { WARN_ON(mcs & ~0xF); - WARN_ON(nss & ~0x7); - rate->idx = (nss << 4) | mcs; + WARN_ON((nss - 1) & ~0x7); + rate->idx = ((nss - 1) << 4) | mcs; } static inline u8 @@ -614,7 +614,7 @@ ieee80211_rate_get_vht_mcs(const struct ieee80211_tx_rate *rate) static inline u8 ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) { - return rate->idx >> 4; + return (rate->idx >> 4) + 1; } /** -- cgit v1.2.3-70-g09d2 From 43b5abe0640100a9e9424c91298c7993d443ffb7 Mon Sep 17 00:00:00 2001 From: Sascha Herrmann Date: Sun, 14 Apr 2013 22:33:28 +0000 Subject: at86rf230: add irq type configuration option Add option to at86rf230 platform data to configure the type of the interrupt used by the driver. The irq polarity of the device will be configured accordingly. Signed-off-by: Sascha Herrmann Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 47 +++++++++++++++++++++++++------------- include/linux/spi/at86rf230.h | 14 ++++++++++++ 2 files changed, 45 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index fc315ddea61..cf098889ba6 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -219,6 +219,9 @@ struct at86rf230_local { #define IRQ_PLL_UNL (1 << 1) #define IRQ_PLL_LOCK (1 << 0) +#define IRQ_ACTIVE_HIGH 0 +#define IRQ_ACTIVE_LOW 1 + #define STATE_P_ON 0x00 /* BUSY */ #define STATE_BUSY_RX 0x01 #define STATE_BUSY_TX 0x02 @@ -726,11 +729,16 @@ static irqreturn_t at86rf230_isr(int irq, void *data) return IRQ_HANDLED; } +static int at86rf230_irq_polarity(struct at86rf230_local *lp, int pol) +{ + return at86rf230_write_subreg(lp, SR_IRQ_POLARITY, pol); +} static int at86rf230_hw_init(struct at86rf230_local *lp) { + struct at86rf230_platform_data *pdata = lp->spi->dev.platform_data; + int rc, irq_pol; u8 status; - int rc; rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status); if (rc) @@ -748,6 +756,16 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) dev_info(&lp->spi->dev, "Status: %02x\n", status); } + /* configure irq polarity, defaults to high active */ + if (pdata->irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW)) + irq_pol = IRQ_ACTIVE_LOW; + else + irq_pol = IRQ_ACTIVE_HIGH; + + rc = at86rf230_irq_polarity(lp, irq_pol); + if (rc) + return rc; + rc = at86rf230_write_subreg(lp, SR_IRQ_MASK, 0xff); /* IRQ_TRX_UR | * IRQ_CCA_ED | * IRQ_TRX_END | @@ -798,37 +816,36 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) return 0; } -static int at86rf230_fill_data(struct spi_device *spi) +static void at86rf230_fill_data(struct spi_device *spi) { struct at86rf230_local *lp = spi_get_drvdata(spi); struct at86rf230_platform_data *pdata = spi->dev.platform_data; - if (!pdata) { - dev_err(&spi->dev, "no platform_data\n"); - return -EINVAL; - } - lp->rstn = pdata->rstn; lp->slp_tr = pdata->slp_tr; lp->dig2 = pdata->dig2; - - return 0; } static int at86rf230_probe(struct spi_device *spi) { + struct at86rf230_platform_data *pdata; struct ieee802154_dev *dev; struct at86rf230_local *lp; u8 man_id_0, man_id_1; - int rc; + int rc, supported = 0; const char *chip; - int supported = 0; if (!spi->irq) { dev_err(&spi->dev, "no IRQ specified\n"); return -EINVAL; } + pdata = spi->dev.platform_data; + if (!pdata) { + dev_err(&spi->dev, "no platform_data\n"); + return -EINVAL; + } + dev = ieee802154_alloc_device(sizeof(*lp), &at86rf230_ops); if (!dev) return -ENOMEM; @@ -851,9 +868,7 @@ static int at86rf230_probe(struct spi_device *spi) spi_set_drvdata(spi, lp); - rc = at86rf230_fill_data(spi); - if (rc) - goto err_fill; + at86rf230_fill_data(spi); rc = gpio_request(lp->rstn, "rstn"); if (rc) @@ -928,7 +943,8 @@ static int at86rf230_probe(struct spi_device *spi) if (rc) goto err_gpio_dir; - rc = request_irq(spi->irq, at86rf230_isr, IRQF_SHARED, + rc = request_irq(spi->irq, at86rf230_isr, + IRQF_SHARED | pdata->irq_type, dev_name(&spi->dev), lp); if (rc) goto err_gpio_dir; @@ -948,7 +964,6 @@ err_gpio_dir: err_slp_tr: gpio_free(lp->rstn); err_rstn: -err_fill: spi_set_drvdata(spi, NULL); mutex_destroy(&lp->bmux); ieee802154_free_device(lp->dev); diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index b2b1afbb320..aa327a8105a 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -26,6 +26,20 @@ struct at86rf230_platform_data { int rstn; int slp_tr; int dig2; + + /* Setting the irq_type will configure the driver to request + * the platform irq trigger type according to the given value + * and configure the interrupt polarity of the device to the + * corresponding polarity. + * + * Allowed values are: IRQF_TRIGGER_RISING, IRQF_TRIGGER_FALLING, + * IRQF_TRIGGER_HIGH and IRQF_TRIGGER_LOW + * + * Setting it to 0, the driver does not touch the trigger type + * configuration of the interrupt and sets the interrupt polarity + * of the device to high active (the default value). + */ + int irq_type; }; #endif -- cgit v1.2.3-70-g09d2 From c7995c43facc6e5dea4de63fa9d283a337aabeb1 Mon Sep 17 00:00:00 2001 From: Atzm Watanabe Date: Tue, 16 Apr 2013 02:50:52 +0000 Subject: vxlan: Allow setting destination to unicast address. This patch allows setting VXLAN destination to unicast address. It allows that VXLAN can be used as peer-to-peer tunnel without multicast. v4: generalize struct vxlan_dev, "gaddr" is replaced with vxlan_rdst. "GROUP" attribute is replaced with "REMOTE". they are based by David Stevens's comments. v3: move a new attribute REMOTE into the last of an enum list based by Stephen Hemminger's comments. v2: use a new attribute REMOTE instead of GROUP based by Cong Wang's comments. Signed-off-by: Atzm Watanabe Acked-by: David L Stevens Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 73 ++++++++++++++++++-------------------------- include/uapi/linux/if_link.h | 2 +- 2 files changed, 31 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 97a306c9e65..916a62149a1 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -105,10 +105,8 @@ struct vxlan_fdb { struct vxlan_dev { struct hlist_node hlist; struct net_device *dev; - __u32 vni; /* virtual network id */ - __be32 gaddr; /* multicast group */ + struct vxlan_rdst default_dst; /* default destination */ __be32 saddr; /* source address */ - unsigned int link; /* link to multicast over */ __u16 port_min; /* source port range */ __u16 port_max; __u8 tos; /* TOS override */ @@ -146,7 +144,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) struct vxlan_dev *vxlan; hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) { - if (vxlan->vni == id) + if (vxlan->default_dst.remote_vni == id) return vxlan; } @@ -194,7 +192,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (rdst->remote_port && rdst->remote_port != vxlan_port && nla_put_be16(skb, NDA_PORT, rdst->remote_port)) goto nla_put_failure; - if (rdst->remote_vni != vxlan->vni && + if (rdst->remote_vni != vxlan->default_dst.remote_vni && nla_put_be32(skb, NDA_VNI, rdst->remote_vni)) goto nla_put_failure; if (rdst->remote_ifindex && @@ -465,7 +463,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; vni = nla_get_u32(tb[NDA_VNI]); } else - vni = vxlan->vni; + vni = vxlan->default_dst.remote_vni; if (tb[NDA_IFINDEX]) { struct net_device *tdev; @@ -570,7 +568,7 @@ static void vxlan_snoop(struct net_device *dev, err = vxlan_fdb_create(vxlan, src_mac, src_ip, NUD_REACHABLE, NLM_F_EXCL|NLM_F_CREATE, - vxlan_port, vxlan->vni, 0); + vxlan_port, vxlan->default_dst.remote_vni, 0); spin_unlock(&vxlan->hash_lock); } } @@ -591,7 +589,7 @@ static bool vxlan_group_used(struct vxlan_net *vn, if (!netif_running(vxlan->dev)) continue; - if (vxlan->gaddr == this->gaddr) + if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip) return true; } @@ -605,8 +603,8 @@ static int vxlan_join_group(struct net_device *dev) struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct sock *sk = vn->sock->sk; struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = vxlan->gaddr, - .imr_ifindex = vxlan->link, + .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, + .imr_ifindex = vxlan->default_dst.remote_ifindex, }; int err; @@ -633,8 +631,8 @@ static int vxlan_leave_group(struct net_device *dev) int err = 0; struct sock *sk = vn->sock->sk; struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = vxlan->gaddr, - .imr_ifindex = vxlan->link, + .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, + .imr_ifindex = vxlan->default_dst.remote_ifindex, }; /* Only leave group when last vxlan is done. */ @@ -1091,7 +1089,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct ethhdr *eth; bool did_rsc = false; - struct vxlan_rdst group, *rdst0, *rdst; + struct vxlan_rdst *rdst0, *rdst; struct vxlan_fdb *f; int rc1, rc; @@ -1106,14 +1104,9 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) f = vxlan_find_mac(vxlan, eth->h_dest); if (f == NULL) { did_rsc = false; - group.remote_port = vxlan_port; - group.remote_vni = vxlan->vni; - group.remote_ip = vxlan->gaddr; - group.remote_ifindex = vxlan->link; - group.remote_next = NULL; - rdst0 = &group; - - if (group.remote_ip == htonl(INADDR_ANY) && + rdst0 = &vxlan->default_dst; + + if (rdst0->remote_ip == htonl(INADDR_ANY) && (vxlan->flags & VXLAN_F_L2MISS) && !is_multicast_ether_addr(eth->h_dest)) vxlan_fdb_miss(vxlan, eth->h_dest); @@ -1191,7 +1184,7 @@ static int vxlan_open(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; - if (vxlan->gaddr) { + if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { err = vxlan_join_group(dev); if (err) return err; @@ -1225,7 +1218,7 @@ static int vxlan_stop(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - if (vxlan->gaddr) + if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) vxlan_leave_group(dev); del_timer_sync(&vxlan->age_timer); @@ -1311,7 +1304,7 @@ static void vxlan_setup(struct net_device *dev) static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_ID] = { .type = NLA_U32 }, - [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VXLAN_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, @@ -1349,14 +1342,6 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) return -ERANGE; } - if (data[IFLA_VXLAN_GROUP]) { - __be32 gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]); - if (!IN_MULTICAST(ntohl(gaddr))) { - pr_debug("group address is not IPv4 multicast\n"); - return -EADDRNOTAVAIL; - } - } - if (data[IFLA_VXLAN_PORT_RANGE]) { const struct ifla_vxlan_port_range *p = nla_data(data[IFLA_VXLAN_PORT_RANGE]); @@ -1387,6 +1372,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; __u32 vni; int err; @@ -1398,21 +1384,21 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, pr_info("duplicate VNI %u\n", vni); return -EEXIST; } - vxlan->vni = vni; + dst->remote_vni = vni; - if (data[IFLA_VXLAN_GROUP]) - vxlan->gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]); + if (data[IFLA_VXLAN_REMOTE]) + dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_REMOTE]); if (data[IFLA_VXLAN_LOCAL]) vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]); if (data[IFLA_VXLAN_LINK] && - (vxlan->link = nla_get_u32(data[IFLA_VXLAN_LINK]))) { + (dst->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]))) { struct net_device *lowerdev - = __dev_get_by_index(net, vxlan->link); + = __dev_get_by_index(net, dst->remote_ifindex); if (!lowerdev) { - pr_info("ifindex %d does not exist\n", vxlan->link); + pr_info("ifindex %d does not exist\n", dst->remote_ifindex); return -ENODEV; } @@ -1464,7 +1450,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, err = register_netdevice(dev); if (!err) - hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni)); + hlist_add_head_rcu(&vxlan->hlist, vni_head(net, dst->remote_vni)); return err; } @@ -1482,7 +1468,7 @@ static size_t vxlan_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ - nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */ + nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_REMOTE */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ nla_total_size(sizeof(__be32))+ /* IFLA_VXLAN_LOCAL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ @@ -1501,18 +1487,19 @@ static size_t vxlan_get_size(const struct net_device *dev) static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) { const struct vxlan_dev *vxlan = netdev_priv(dev); + const struct vxlan_rdst *dst = &vxlan->default_dst; struct ifla_vxlan_port_range ports = { .low = htons(vxlan->port_min), .high = htons(vxlan->port_max), }; - if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni)) + if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) goto nla_put_failure; - if (vxlan->gaddr && nla_put_be32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr)) + if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_REMOTE, dst->remote_ip)) goto nla_put_failure; - if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link)) + if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex)) goto nla_put_failure; if (vxlan->saddr && nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr)) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6b35c4211f6..9922704f08a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -296,7 +296,7 @@ enum macvlan_mode { enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, - IFLA_VXLAN_GROUP, + IFLA_VXLAN_REMOTE, IFLA_VXLAN_LINK, IFLA_VXLAN_LOCAL, IFLA_VXLAN_TTL, -- cgit v1.2.3-70-g09d2 From 2ffbe6d333664a089f17b13aa79eefe38f794bb7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 16 Apr 2013 13:38:42 +0200 Subject: mac80211: fix and optimize MCS mask handling Currently the code always copies the configured MCS mask (even if it is set to default), but only uses it if legacy rates were also masked out. Fix this by adding a flag that tracks whether the configured MCS mask is set to default or not. Optimize the code further by storing a pointer to the configured rate mask in txrc instead of using memcpy. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++-- net/mac80211/cfg.c | 13 +++++++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/rate.c | 9 ++++++--- net/mac80211/tx.c | 10 +++++----- 5 files changed, 28 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bc5d8184c48..05dbb978850 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4107,7 +4107,7 @@ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); * (deprecated; this will be removed once drivers get updated to use * rate_idx_mask) * @rate_idx_mask: user-requested (legacy) rate mask - * @rate_idx_mcs_mask: user-requested MCS rate mask + * @rate_idx_mcs_mask: user-requested MCS rate mask (NULL if not in use) * @bss: whether this frame is sent out in AP or IBSS mode */ struct ieee80211_tx_rate_control { @@ -4119,7 +4119,7 @@ struct ieee80211_tx_rate_control { bool rts, short_preamble; u8 max_rate_idx; u32 rate_idx_mask; - u8 rate_idx_mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; + u8 *rate_idx_mcs_mask; bool bss; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fdd95bd751a..72ab1c0e3ca 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2417,9 +2417,22 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, } for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + struct ieee80211_supported_band *sband = wiphy->bands[i]; + int j; + sdata->rc_rateidx_mask[i] = mask->control[i].legacy; memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs, sizeof(mask->control[i].mcs)); + + sdata->rc_has_mcs_mask[i] = false; + if (!sband) + continue; + + for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) + if (~sdata->rc_rateidx_mcs_mask[i][j]) { + sdata->rc_has_mcs_mask[i] = true; + break; + } } return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f4a65a340a5..21c1720eee0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -739,6 +739,8 @@ struct ieee80211_sub_if_data { /* bitmap of allowed (non-MCS) rate indexes for rate control */ u32 rc_rateidx_mask[IEEE80211_NUM_BANDS]; + + bool rc_has_mcs_mask[IEEE80211_NUM_BANDS]; u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN]; union { diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index dd88381c53b..5d545dd2d05 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -460,9 +460,12 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, * the common case. */ mask = sdata->rc_rateidx_mask[info->band]; - memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band], - sizeof(mcs_mask)); - if (mask != (1 << txrc->sband->n_bitrates) - 1) { + if (mask != (1 << txrc->sband->n_bitrates) - 1 || txrc->rate_idx_mcs_mask) { + if (txrc->rate_idx_mcs_mask) + memcpy(mcs_mask, txrc->rate_idx_mcs_mask, sizeof(mcs_mask)); + else + memset(mcs_mask, 0xff, sizeof(mcs_mask)); + if (sta) { /* Filter out rates that the STA does not support */ mask &= sta->sta.supp_rates[info->band]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bb82c873f77..15c1b286e28 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -642,9 +642,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - memcpy(txrc.rate_idx_mcs_mask, - tx->sdata->rc_rateidx_mcs_mask[info->band], - sizeof(txrc.rate_idx_mcs_mask)); + + if (tx->sdata->rc_has_mcs_mask[info->band]) + txrc.rate_idx_mcs_mask = + tx->sdata->rc_rateidx_mcs_mask[info->band]; + txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || tx->sdata->vif.type == NL80211_IFTYPE_ADHOC); @@ -2508,8 +2510,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - memcpy(txrc.rate_idx_mcs_mask, sdata->rc_rateidx_mcs_mask[band], - sizeof(txrc.rate_idx_mcs_mask)); txrc.bss = true; rate_control_get_rate(sdata, NULL, &txrc); -- cgit v1.2.3-70-g09d2 From 991fec091061b901e4fdcc8af4fd25d24a5a7bab Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 16 Apr 2013 13:38:43 +0200 Subject: mac80211: fix CTS protection handling The rates[0] CTS and RTS flags are only set after rate control has been called, so minstrel cannot use them to for setting the number of retries. This patch adds two new flags to explicitly indicate RTS/CTS use. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 +++- net/mac80211/rc80211_minstrel.c | 6 ++++-- net/mac80211/tx.c | 8 ++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 05dbb978850..4f693a5c54d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -655,7 +655,9 @@ struct ieee80211_tx_info { struct ieee80211_tx_rate rates[ IEEE80211_TX_MAX_RATES]; s8 rts_cts_rate_idx; - /* 3 bytes free */ + u8 use_rts:1; + u8 use_cts_prot:1; + /* 2 bytes free */ }; /* only needed before rate control */ unsigned long jiffies; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 1c36c9b4fa4..eda290fb8bd 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -209,9 +209,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr, { unsigned int retry = mr->adjusted_retry_count; - if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + if (info->control.use_rts) retry = max(2U, min(mr->retry_count_rtscts, retry)); - else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) + else if (info->control.use_cts_prot) retry = max(2U, min(mr->retry_count_cts, retry)); return retry; } @@ -460,6 +460,8 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, } while ((tx_time < mp->segment_size) && (++mr->retry_count < mp->max_retry)); mr->adjusted_retry_count = mr->retry_count; + if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G)) + mr->retry_count_cts = mr->retry_count; } for (i = n; i < sband->n_bitrates; i++) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 15c1b286e28..6ca857f8f42 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -656,6 +656,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.rts = rts = true; } + info->control.use_rts = rts; + info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot; + /* * Use short preamble if the BSS can handle it, but not for * management frames unless we know the receiver can handle @@ -766,6 +769,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) */ if (rc_rate->flags & IEEE80211_TX_RC_MCS) { WARN_ON(rc_rate->idx > 76); + + if (!(rc_rate->flags & IEEE80211_TX_RC_USE_RTS_CTS) && + tx->sdata->vif.bss_conf.use_cts_prot) + rc_rate->flags |= + IEEE80211_TX_RC_USE_CTS_PROTECT; continue; } -- cgit v1.2.3-70-g09d2 From fc225c3f5d1b6aa6f99c5c300af4605e4923ce79 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:38 +0200 Subject: Bluetooth: remove unneeded hci_conn_hold/put_device() hci_conn_hold/put_device() is used to control when hci_conn->dev is no longer needed and can be deleted from the system. Lets first look how they are currently used throughout the code (excluding HIDP!). All code that uses hci_conn_hold_device() looks like this: ... hci_conn_hold_device(); hci_conn_add_sysfs(); ... On the other side, hci_conn_put_device() is exclusively used in hci_conn_del(). So, considering that hci_conn_del() must not be called twice (which would fail horribly), we know that hci_conn_put_device() is only called _once_ (which is in hci_conn_del()). On the other hand, hci_conn_add_sysfs() must not be called twice, either (it would call device_add twice, which breaks the device, see drivers/base/core.c). So we know that hci_conn_hold_device() is also called only once (it's only called directly before hci_conn_add_sysfs()). So hold and put are known to be called only once. That means we can safely remove them and directly call hci_conn_del_sysfs() in hci_conn_del(). But there is one issue left: HIDP also uses hci_conn_hold/put_device(). However, this case can be ignored and simply removed as it is totally broken. The issue is, the only thing HIDP delays with hci_conn_hold_device() is the removal of the hci_conn->dev from sysfs. But, the hci_conn device has no mechanism to get notified when its own parent (hci_dev) gets removed from sysfs. hci_dev_hold/put() does _not_ control when it is removed but only when the device object is created and destroyed. And hci_dev calls hci_conn_flush_*() when it removes itself from sysfs, which itself causes hci_conn_del() to be called, but it does _not_ cause hci_conn_del_sysfs() to be called, which is wrong. Hence, we fix it to call hci_conn_del_sysfs() in hci_conn_del(). This guarantees that a hci_conn object is removed from sysfs _before_ its parent hci_dev is removed. The changes to HIDP look scary, wrong and broken. However, if you look at the HIDP session management, you will notice they're already broken in the exact _same_ way (ever tried "unplugging" HIDP devices? Breaks _all_ the time). So this patch only makes HIDP look _scary_ and _obviously broken_. It does not break HIDP itself, it already is! See later patches in this series which fix HIDP to use proper session-management. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 4 ---- net/bluetooth/hci_conn.c | 17 +---------------- net/bluetooth/hci_event.c | 4 ---- net/bluetooth/hidp/core.c | 20 +++----------------- 4 files changed, 4 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 78ea9c7c202..5590cc4412c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -345,7 +345,6 @@ struct hci_conn { struct timer_list auto_accept_timer; struct device dev; - atomic_t devref; struct hci_dev *hdev; void *l2cap_data; @@ -601,9 +600,6 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); -void hci_conn_hold_device(struct hci_conn *conn); -void hci_conn_put_device(struct hci_conn *conn); - static inline void hci_conn_hold(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b1a02ce39a2..6b5b8e77cf0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -410,8 +410,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); - atomic_set(&conn->devref, 0); - hci_conn_init_sysfs(conn); return conn; @@ -460,7 +458,7 @@ int hci_conn_del(struct hci_conn *conn) skb_queue_purge(&conn->data_q); - hci_conn_put_device(conn); + hci_conn_del_sysfs(conn); hci_dev_put(hdev); @@ -847,19 +845,6 @@ void hci_conn_check_pending(struct hci_dev *hdev) hci_dev_unlock(hdev); } -void hci_conn_hold_device(struct hci_conn *conn) -{ - atomic_inc(&conn->devref); -} -EXPORT_SYMBOL(hci_conn_hold_device); - -void hci_conn_put_device(struct hci_conn *conn) -{ - if (atomic_dec_and_test(&conn->devref)) - hci_conn_del_sysfs(conn); -} -EXPORT_SYMBOL(hci_conn_put_device); - int hci_get_conn_list(void __user *arg) { struct hci_conn *c; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f6ea3c73426..688c1a9949c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1706,7 +1706,6 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } else conn->state = BT_CONNECTED; - hci_conn_hold_device(conn); hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) @@ -2987,7 +2986,6 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; - hci_conn_hold_device(conn); hci_conn_add_sysfs(conn); break; @@ -3452,7 +3450,6 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hcon->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(hcon); - hci_conn_hold_device(hcon); hci_conn_add_sysfs(hcon); amp_physical_cfm(bredr_hcon, hcon); @@ -3586,7 +3583,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; - hci_conn_hold_device(conn); hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 4ab82cb3eac..9734136d643 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -73,18 +73,6 @@ static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr) return NULL; } -static void __hidp_link_session(struct hidp_session *session) -{ - list_add(&session->list, &hidp_session_list); -} - -static void __hidp_unlink_session(struct hidp_session *session) -{ - hci_conn_put_device(session->conn); - - list_del(&session->list); -} - static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci) { memset(ci, 0, sizeof(*ci)); @@ -760,7 +748,7 @@ static int hidp_session(void *arg) fput(session->ctrl_sock->file); - __hidp_unlink_session(session); + list_del(&session->list); up_write(&hidp_session_sem); @@ -783,8 +771,6 @@ static struct hci_conn *hidp_get_connection(struct hidp_session *session) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); - if (conn) - hci_conn_hold_device(conn); hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -1026,7 +1012,7 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; - __hidp_link_session(session); + list_add(&session->list, &hidp_session_list); if (req->rd_size > 0) { err = hidp_setup_hid(session, req); @@ -1106,7 +1092,7 @@ unlink: session->rd_data = NULL; purge: - __hidp_unlink_session(session); + list_del(&session->list); skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); -- cgit v1.2.3-70-g09d2 From 8d12356f33f819ec0d064e233f7ca8e59eaa38ef Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:39 +0200 Subject: Bluetooth: introduce hci_conn ref-counting We currently do not allow using hci_conn from outside of HCI-core. However, several other users could make great use of it. This includes HIDP, rfcomm and all other sub-protocols that rely on an active connection. Hence, we now introduce hci_conn ref-counting. We currently never call get_device(). put_device() is exclusively used in hci_conn_del_sysfs(). Hence, we currently never have a greater device-refcnt than 1. Therefore, it is safe to move the put_device() call from hci_conn_del_sysfs() to hci_conn_del() (it's the only caller). In fact, this even fixes a "use-after-free" bug as we access hci_conn after calling hci_conn_del_sysfs() in hci_conn_del(). From now on we can add references to hci_conn objects in other layers (like l2cap_sock, HIDP, rfcomm, ...) and grab a reference via hci_conn_get(). This does _not_ guarantee, that the connection is still alive. But, this isn't what we want. We can simply lock the hci_conn device and use "device_is_registered(hci_conn->dev)" to test that. However, this is hardly necessary as outside users should never rely on the HCI connection to be alive, anyway. Instead, they should solely rely on the device-object to be available. But if sub-devices want the hci_conn object as sysfs parent, they need to be notified when the connection drops. This will be introduced in later patches with l2cap_users. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 31 +++++++++++++++++++++++++++++++ net/bluetooth/hci_conn.c | 3 +-- net/bluetooth/hci_sysfs.c | 1 - 3 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5590cc4412c..d324b11a0c8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -600,6 +600,37 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); +/* + * hci_conn_get() and hci_conn_put() are used to control the life-time of an + * "hci_conn" object. They do not guarantee that the hci_conn object is running, + * working or anything else. They just guarantee that the object is available + * and can be dereferenced. So you can use its locks, local variables and any + * other constant data. + * Before accessing runtime data, you _must_ lock the object and then check that + * it is still running. As soon as you release the locks, the connection might + * get dropped, though. + * + * On the other hand, hci_conn_hold() and hci_conn_drop() are used to control + * how long the underlying connection is held. So every channel that runs on the + * hci_conn object calls this to prevent the connection from disappearing. As + * long as you hold a device, you must also guarantee that you have a valid + * reference to the device via hci_conn_get() (or the initial reference from + * hci_conn_add()). + * The hold()/drop() ref-count is known to drop below 0 sometimes, which doesn't + * break because nobody cares for that. But this means, we cannot use + * _get()/_drop() in it, but require the caller to have a valid ref (FIXME). + */ + +static inline void hci_conn_get(struct hci_conn *conn) +{ + get_device(&conn->dev); +} + +static inline void hci_conn_put(struct hci_conn *conn) +{ + put_device(&conn->dev); +} + static inline void hci_conn_hold(struct hci_conn *conn) { BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt)); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6b5b8e77cf0..6c7f3637972 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -462,8 +462,7 @@ int hci_conn_del(struct hci_conn *conn) hci_dev_put(hdev); - if (conn->handle == 0) - kfree(conn); + hci_conn_put(conn); return 0; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ff38561385d..6fe15c82284 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -146,7 +146,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn) } device_del(&conn->dev); - put_device(&conn->dev); hci_dev_put(hdev); } -- cgit v1.2.3-70-g09d2 From f53c20e93612f708ed3b378ec9735b779dcd7d59 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:42 +0200 Subject: Bluetooth: allow constant arguments for bacmp()/bacpy() There is no reason to require the source arguments to be writeable so fix this to allow constant source addresses. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/bluetooth.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 591fee7d006..6912ef9a188 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -193,11 +193,11 @@ static inline bool bdaddr_type_is_le(__u8 type) #define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff} }) /* Copy, swap, convert BD Address */ -static inline int bacmp(bdaddr_t *ba1, bdaddr_t *ba2) +static inline int bacmp(const bdaddr_t *ba1, const bdaddr_t *ba2) { return memcmp(ba1, ba2, sizeof(bdaddr_t)); } -static inline void bacpy(bdaddr_t *dst, bdaddr_t *src) +static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src) { memcpy(dst, src, sizeof(bdaddr_t)); } -- cgit v1.2.3-70-g09d2 From 9c903e373c11f62d62bce1209f662ca92589a075 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:44 +0200 Subject: Bluetooth: l2cap: introduce l2cap_conn ref-counting If we want to use l2cap_conn outside of l2cap_core.c, we need refcounting for these objects. Otherwise, we cannot synchronize l2cap locks with outside locks and end up with deadlocks. Hence, introduce ref-counting for l2cap_conn objects. This doesn't affect l2cap internals at all, as they use a direct synchronization. We also keep a reference to the parent hci_conn for locking purposes as l2cap_conn depends on this. This doesn't affect the connection itself but only the lifetime of the (dead) object. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ++++ net/bluetooth/l2cap_core.c | 25 ++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 278830ef92c..7b4cc5b9853 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -583,6 +583,7 @@ struct l2cap_conn { struct list_head chan_l; struct mutex chan_lock; + struct kref ref; }; #define L2CAP_INFO_CL_MTU_REQ_SENT 0x01 @@ -813,4 +814,7 @@ void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan, u8 status); void __l2cap_physical_cfm(struct l2cap_chan *chan, int result); +void l2cap_conn_get(struct l2cap_conn *conn); +void l2cap_conn_put(struct l2cap_conn *conn); + #endif /* __L2CAP_H */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e09b89be1c4..be9ad89339c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1486,7 +1486,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) } hcon->l2cap_data = NULL; - kfree(conn); + conn->hchan = NULL; + l2cap_conn_put(conn); } static void security_timeout(struct work_struct *work) @@ -1520,8 +1521,10 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) return NULL; } + kref_init(&conn->ref); hcon->l2cap_data = conn; conn->hcon = hcon; + hci_conn_get(conn->hcon); conn->hchan = hchan; BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); @@ -1558,6 +1561,26 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) return conn; } +static void l2cap_conn_free(struct kref *ref) +{ + struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref); + + hci_conn_put(conn->hcon); + kfree(conn); +} + +void l2cap_conn_get(struct l2cap_conn *conn) +{ + kref_get(&conn->ref); +} +EXPORT_SYMBOL(l2cap_conn_get); + +void l2cap_conn_put(struct l2cap_conn *conn) +{ + kref_put(&conn->ref, l2cap_conn_free); +} +EXPORT_SYMBOL(l2cap_conn_put); + /* ---- Socket interface ---- */ /* Find socket with psm and source / destination bdaddr. -- cgit v1.2.3-70-g09d2 From 2c8e1411e93391c5a78f55b09697a997474a4707 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sat, 6 Apr 2013 20:28:45 +0200 Subject: Bluetooth: l2cap: add l2cap_user sub-modules Several sub-modules like HIDP, rfcomm, ... need to track l2cap connections. The l2cap_conn->hcon->dev object is used as parent for sysfs devices so the sub-modules need to be notified when the hci_conn object is removed from sysfs. As submodules normally use the l2cap layer, the l2cap_user objects are registered there instead of on the underlying hci_conn object. This avoids any direct dependency on the HCI layer and lets the l2cap core handle any specifics. This patch introduces l2cap_user objects which contain a "probe" and "remove" callback. You can register them on any l2cap_conn object and if it is active, the "probe" callback will get called. Otherwise, an error is returned. The l2cap_conn object will call your "remove" callback directly before it is removed from user-space. This allows you to remove your submodules _before_ the parent l2cap_conn and hci_conn object is removed. At any time you can asynchronously unregister your l2cap_user object if your submodule vanishes before the l2cap_conn object does. There is no way around l2cap_user. If we want wire-protocols in the kernel, we always want the hci_conn object as parent in the sysfs tree. We cannot use a channel here since we might need multiple channels for a single protocol. But the problem is, we _must_ get notified when an l2cap_conn object is removed. We cannot use reference-counting for object-removal! This is not how it works. If a hardware is removed, we should immediately remove the object from sysfs. Any other behavior would be inconsistent with the rest of the system. Also note that device_del() might sleep, but it doesn't wait for user-space or block very long. It only _unlinks_ the object from sysfs and the whole device-tree. Everything else is handled by ref-counts! This is exactly what the other sub-modules must do: unlink their devices when the "remove" l2cap_user callback is called. They should not do any cleanup or synchronous shutdowns. Signed-off-by: David Herrmann Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 10 +++++ net/bluetooth/l2cap_core.c | 86 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 7b4cc5b9853..fb94cf13c77 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -584,6 +584,13 @@ struct l2cap_conn { struct list_head chan_l; struct mutex chan_lock; struct kref ref; + struct list_head users; +}; + +struct l2cap_user { + struct list_head list; + int (*probe) (struct l2cap_conn *conn, struct l2cap_user *user); + void (*remove) (struct l2cap_conn *conn, struct l2cap_user *user); }; #define L2CAP_INFO_CL_MTU_REQ_SENT 0x01 @@ -817,4 +824,7 @@ void __l2cap_physical_cfm(struct l2cap_chan *chan, int result); void l2cap_conn_get(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); +int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user); +void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user); + #endif /* __L2CAP_H */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index be9ad89339c..eae1d9f90b6 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1446,6 +1446,89 @@ static void l2cap_info_timeout(struct work_struct *work) l2cap_conn_start(conn); } +/* + * l2cap_user + * External modules can register l2cap_user objects on l2cap_conn. The ->probe + * callback is called during registration. The ->remove callback is called + * during unregistration. + * An l2cap_user object can either be explicitly unregistered or when the + * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon, + * l2cap->hchan, .. are valid as long as the remove callback hasn't been called. + * External modules must own a reference to the l2cap_conn object if they intend + * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at + * any time if they don't. + */ + +int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) +{ + struct hci_dev *hdev = conn->hcon->hdev; + int ret; + + /* We need to check whether l2cap_conn is registered. If it is not, we + * must not register the l2cap_user. l2cap_conn_del() is unregisters + * l2cap_conn objects, but doesn't provide its own locking. Instead, it + * relies on the parent hci_conn object to be locked. This itself relies + * on the hci_dev object to be locked. So we must lock the hci device + * here, too. */ + + hci_dev_lock(hdev); + + if (user->list.next || user->list.prev) { + ret = -EINVAL; + goto out_unlock; + } + + /* conn->hchan is NULL after l2cap_conn_del() was called */ + if (!conn->hchan) { + ret = -ENODEV; + goto out_unlock; + } + + ret = user->probe(conn, user); + if (ret) + goto out_unlock; + + list_add(&user->list, &conn->users); + ret = 0; + +out_unlock: + hci_dev_unlock(hdev); + return ret; +} +EXPORT_SYMBOL(l2cap_register_user); + +void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) +{ + struct hci_dev *hdev = conn->hcon->hdev; + + hci_dev_lock(hdev); + + if (!user->list.next || !user->list.prev) + goto out_unlock; + + list_del(&user->list); + user->list.next = NULL; + user->list.prev = NULL; + user->remove(conn, user); + +out_unlock: + hci_dev_unlock(hdev); +} +EXPORT_SYMBOL(l2cap_unregister_user); + +static void l2cap_unregister_all_users(struct l2cap_conn *conn) +{ + struct l2cap_user *user; + + while (!list_empty(&conn->users)) { + user = list_first_entry(&conn->users, struct l2cap_user, list); + list_del(&user->list); + user->list.next = NULL; + user->list.prev = NULL; + user->remove(conn, user); + } +} + static void l2cap_conn_del(struct hci_conn *hcon, int err) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -1458,6 +1541,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) kfree_skb(conn->rx_skb); + l2cap_unregister_all_users(conn); + mutex_lock(&conn->chan_lock); /* Kill channels */ @@ -1550,6 +1635,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) mutex_init(&conn->chan_lock); INIT_LIST_HEAD(&conn->chan_l); + INIT_LIST_HEAD(&conn->users); if (hcon->type == LE_LINK) INIT_DELAYED_WORK(&conn->security_timer, security_timeout); -- cgit v1.2.3-70-g09d2 From 542c2d832087aa78566be49aa4284779a0a687b3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:10 +0000 Subject: net: sctp: sctp_ssnmap: remove 'malloced' element from struct sctp_ssnmap_init() can only be called from sctp_ssnmap_new() where malloced is always set to 1. Thus, when we call sctp_ssnmap_free() the test for map->malloced evaluates always to true. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - net/sctp/ssnmap.c | 23 ++++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e12aa77abc5..3c1bb8dd1e2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -399,7 +399,6 @@ struct sctp_stream { struct sctp_ssnmap { struct sctp_stream in; struct sctp_stream out; - int malloced; }; struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 825ea94415b..da860352380 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -74,7 +74,6 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, if (!sctp_ssnmap_init(retval, in, out)) goto fail_map; - retval->malloced = 1; SCTP_DBG_OBJCNT_INC(ssnmap); return retval; @@ -118,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map) /* Dispose of a ssnmap. */ void sctp_ssnmap_free(struct sctp_ssnmap *map) { - if (map && map->malloced) { - int size; - - size = sctp_ssnmap_size(map->in.len, map->out.len); - if (size <= KMALLOC_MAX_SIZE) - kfree(map); - else - free_pages((unsigned long)map, get_order(size)); - SCTP_DBG_OBJCNT_DEC(ssnmap); - } + int size; + + if (unlikely(!map)) + return; + + size = sctp_ssnmap_size(map->in.len, map->out.len); + if (size <= KMALLOC_MAX_SIZE) + kfree(map); + else + free_pages((unsigned long)map, get_order(size)); + + SCTP_DBG_OBJCNT_DEC(ssnmap); } -- cgit v1.2.3-70-g09d2 From ee16371e6c737684215ee10b4b9756b610d81272 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:11 +0000 Subject: net: sctp: sctp_inq: remove dead code sctp_inq is never kmalloced, since it's integrated into sctp_ep_common and only initialized from eps and assocs. Therefore, remove the dead code from there. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- net/sctp/inqueue.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3c1bb8dd1e2..125a19c7353 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -991,8 +991,6 @@ struct sctp_inq { * messages. */ struct work_struct immediate; - - int malloced; /* Is this structure kfree()able? */ }; void sctp_inq_init(struct sctp_inq *); diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 2d5ad280de3..3221d073448 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue) /* Create a task for delivering data. */ INIT_WORK(&queue->immediate, NULL); - - queue->malloced = 0; } /* Release the memory associated with an SCTP inqueue. */ @@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue) sctp_chunk_free(queue->in_progress); queue->in_progress = NULL; } - - if (queue->malloced) { - /* Dump the master memory segment. */ - kfree(queue); - } } /* Put a new packet in an SCTP inqueue. -- cgit v1.2.3-70-g09d2 From 165a4c31278c980862b2c2ddec408cf30341f3ec Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:12 +0000 Subject: net: sctp: sctp_outq: remove 'malloced' from its struct sctp_outq is embedded into sctp_association, and thus never kmalloced in any way. Also, malloced is always 0, thus kfree() is never called. Therefore, remove that dead piece of code. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 --- net/sctp/outqueue.c | 6 ------ 2 files changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 125a19c7353..73fd5de4d4c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1059,9 +1059,6 @@ struct sctp_outq { /* Is this structure empty? */ char empty; - - /* Are we kfree()able? */ - char malloced; }; void sctp_outq_init(struct sctp_association *, struct sctp_outq *); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 01dca753db1..d4c137e1ab8 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -217,8 +217,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) q->outstanding_bytes = 0; q->empty = 1; q->cork = 0; - - q->malloced = 0; q->out_qlen = 0; } @@ -295,10 +293,6 @@ void sctp_outq_free(struct sctp_outq *q) { /* Throw away leftover chunks. */ __sctp_outq_teardown(q); - - /* If we were kmalloc()'d, free the memory. */ - if (q->malloced) - kfree(q); } /* Put a new chunk in an sctp_outq. */ -- cgit v1.2.3-70-g09d2 From 8fa5df6d210a09241876b74d156c57d833dd057b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:15 +0000 Subject: net: sctp: sctp_transport: remove unused variable sctp_transport's member 'malloced' is set to 1, never evaluated and the structure is kfreed anyway. So just remove it. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 +---- net/sctp/transport.c | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 73fd5de4d4c..d581af00120 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -779,10 +779,7 @@ struct sctp_transport { hb_sent:1, /* Is the Path MTU update pending on this tranport */ - pmtu_pending:1, - - /* Is this structure kfree()able? */ - malloced:1; + pmtu_pending:1; /* Has this transport moved the ctsn since we last sacked */ __u32 sack_generation; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index fafd2a461ba..098f1d5f769 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net, if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; - transport->malloced = 1; SCTP_DBG_OBJCNT_INC(transport); return transport; -- cgit v1.2.3-70-g09d2 From 50181c07cbde370986c4925b830ca291a2fc31ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:16 +0000 Subject: net: sctp: sctp_bind_addr: remove dead code The sctp_bind_addr structure has a 'malloced' member that is always set to 0, thus in sctp_bind_addr_free() the kfree() part can never be called. This part is embedded into sctp_ep_common anyway and never alloced. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- net/sctp/bind_addr.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d581af00120..64d469845f2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1093,8 +1093,6 @@ struct sctp_bind_addr { * peer(s) in INIT and INIT ACK chunks. */ struct list_head address_list; - - int malloced; /* Are we kfree()able? */ }; void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index d886b3bf84f..41145fe3181 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, */ void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port) { - bp->malloced = 0; - INIT_LIST_HEAD(&bp->address_list); bp->port = port; } @@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) { /* Empty the bind address list. */ sctp_bind_addr_clean(bp); - - if (bp->malloced) { - kfree(bp); - SCTP_DBG_OBJCNT_DEC(bind_addr); - } } /* Add an address to the bind address list in the SCTP_bind_addr structure. */ -- cgit v1.2.3-70-g09d2 From c1db7a26ac3f7223a38eaeb46a77d0cf9e6a0d8f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 11:07:17 +0000 Subject: net: sctp: sctp_ulpq: remove 'malloced' struct member The structure sctp_ulpq is embedded into sctp_association and never separately allocated, also ulpq->malloced is always 0, so that kfree() is never called. Therefore, remove this code. Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/ulpqueue.h | 1 - net/sctp/ulpqueue.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index ff1b8ba73ab..00e50ba3f24 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -49,7 +49,6 @@ /* A structure to carry information to the ULP (e.g. Sockets API) */ struct sctp_ulpq { - char malloced; char pd_mode; struct sctp_association *asoc; struct sk_buff_head reasm; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 0fd5b3d2df0..04e3d470f87 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, skb_queue_head_init(&ulpq->reasm); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; - ulpq->malloced = 0; return ulpq; } @@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq) void sctp_ulpq_free(struct sctp_ulpq *ulpq) { sctp_ulpq_flush(ulpq); - if (ulpq->malloced) - kfree(ulpq); } /* Process an incoming DATA chunk. */ -- cgit v1.2.3-70-g09d2 From fa5513be2b709c8ce6ff0b11d0715760a9a70ffd Mon Sep 17 00:00:00 2001 From: Frédéric Dalleau Date: Tue, 16 Apr 2013 17:28:58 +0200 Subject: Bluetooth: Move and rename hci_conn_accept MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since this function is only used by sco, move it from hci_event.c to sco.c and rename to sco_conn_defer_accept. Make it static. Signed-off-by: Frédéric Dalleau Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_event.c | 36 ------------------------------------ net/bluetooth/sco.c | 38 +++++++++++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d324b11a0c8..74f77b7ef13 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -583,7 +583,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst); int hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); -void hci_conn_accept(struct hci_conn *conn, int mask); struct hci_chan *hci_chan_create(struct hci_conn *conn); void hci_chan_del(struct hci_chan *chan); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6fa909075ce..e4d4d231fd8 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1752,42 +1752,6 @@ unlock: hci_conn_check_pending(hdev); } -void hci_conn_accept(struct hci_conn *conn, int mask) -{ - struct hci_dev *hdev = conn->hdev; - - BT_DBG("conn %p", conn); - - conn->state = BT_CONFIG; - - if (!lmp_esco_capable(hdev)) { - struct hci_cp_accept_conn_req cp; - - bacpy(&cp.bdaddr, &conn->dst); - - if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) - cp.role = 0x00; /* Become master */ - else - cp.role = 0x01; /* Remain slave */ - - hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); - } else /* lmp_esco_capable(hdev)) */ { - struct hci_cp_accept_sync_conn_req cp; - - bacpy(&cp.bdaddr, &conn->dst); - cp.pkt_type = cpu_to_le16(conn->pkt_type); - - cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); - cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); - cp.max_latency = __constant_cpu_to_le16(0xffff); - cp.content_format = cpu_to_le16(hdev->voice_setting); - cp.retrans_effort = 0xff; - - hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, - sizeof(cp), &cp); - } -} - static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_request *ev = (void *) skb->data; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d8836802526..9e62102443d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -652,6 +652,42 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err; } +static void sco_conn_defer_accept(struct hci_conn *conn, int mask) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p", conn); + + conn->state = BT_CONFIG; + + if (!lmp_esco_capable(hdev)) { + struct hci_cp_accept_conn_req cp; + + bacpy(&cp.bdaddr, &conn->dst); + + if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) + cp.role = 0x00; /* Become master */ + else + cp.role = 0x01; /* Remain slave */ + + hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); + } else { + struct hci_cp_accept_sync_conn_req cp; + + bacpy(&cp.bdaddr, &conn->dst); + cp.pkt_type = cpu_to_le16(conn->pkt_type); + + cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); + cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); + cp.max_latency = __constant_cpu_to_le16(0xffff); + cp.content_format = cpu_to_le16(hdev->voice_setting); + cp.retrans_effort = 0xff; + + hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, + sizeof(cp), &cp); + } +} + static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -662,7 +698,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - hci_conn_accept(pi->conn->hcon, 0); + sco_conn_defer_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; release_sock(sk); -- cgit v1.2.3-70-g09d2 From cad718ed2f6fd204b2c5cac6b611fc3fcde7b183 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 17 Apr 2013 15:00:51 +0300 Subject: Bluetooth: Track feature pages in a single table The local and remote features are organized by page number. Page 0 are the LMP features, page 1 the host features, and any pages beyond 1 features that future core specification versions may define. So far we've only had the first two pages and two separate variables has been convenient enough, however with the introduction of Core Specification Addendum 4 there are features defined on page 2. Instead of requiring the addition of a new variable each time a new page number is defined, this patch refactors the code to use a single table for the features. The patch needs to update both the hci_dev and hci_conn structures since there are macros that depend on the features being represented in the same way in both of them. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 49 ++++++++++++++++----------------- net/bluetooth/hci_event.c | 58 +++++++++++++++++++++------------------- net/bluetooth/hci_sysfs.c | 16 +++++------ 3 files changed, 63 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 74f77b7ef13..26822967e29 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -134,6 +134,8 @@ struct amp_assoc { __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; }; +#define HCI_MAX_PAGES 2 + #define NUM_REASSEMBLY 4 struct hci_dev { struct list_head list; @@ -151,8 +153,7 @@ struct hci_dev { __u8 dev_class[3]; __u8 major_class; __u8 minor_class; - __u8 features[8]; - __u8 host_features[8]; + __u8 features[HCI_MAX_PAGES][8]; __u8 le_features[8]; __u8 le_white_list_size; __u8 le_states[8]; @@ -313,7 +314,7 @@ struct hci_conn { bool out; __u8 attempt; __u8 dev_class[3]; - __u8 features[8]; + __u8 features[HCI_MAX_PAGES][8]; __u16 interval; __u16 pkt_type; __u16 link_policy; @@ -786,29 +787,29 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev)) /* ----- LMP capabilities ----- */ -#define lmp_encrypt_capable(dev) ((dev)->features[0] & LMP_ENCRYPT) -#define lmp_rswitch_capable(dev) ((dev)->features[0] & LMP_RSWITCH) -#define lmp_hold_capable(dev) ((dev)->features[0] & LMP_HOLD) -#define lmp_sniff_capable(dev) ((dev)->features[0] & LMP_SNIFF) -#define lmp_park_capable(dev) ((dev)->features[1] & LMP_PARK) -#define lmp_inq_rssi_capable(dev) ((dev)->features[3] & LMP_RSSI_INQ) -#define lmp_esco_capable(dev) ((dev)->features[3] & LMP_ESCO) -#define lmp_bredr_capable(dev) (!((dev)->features[4] & LMP_NO_BREDR)) -#define lmp_le_capable(dev) ((dev)->features[4] & LMP_LE) -#define lmp_sniffsubr_capable(dev) ((dev)->features[5] & LMP_SNIFF_SUBR) -#define lmp_pause_enc_capable(dev) ((dev)->features[5] & LMP_PAUSE_ENC) -#define lmp_ext_inq_capable(dev) ((dev)->features[6] & LMP_EXT_INQ) -#define lmp_le_br_capable(dev) !!((dev)->features[6] & LMP_SIMUL_LE_BR) -#define lmp_ssp_capable(dev) ((dev)->features[6] & LMP_SIMPLE_PAIR) -#define lmp_no_flush_capable(dev) ((dev)->features[6] & LMP_NO_FLUSH) -#define lmp_lsto_capable(dev) ((dev)->features[7] & LMP_LSTO) -#define lmp_inq_tx_pwr_capable(dev) ((dev)->features[7] & LMP_INQ_TX_PWR) -#define lmp_ext_feat_capable(dev) ((dev)->features[7] & LMP_EXTFEATURES) +#define lmp_encrypt_capable(dev) ((dev)->features[0][0] & LMP_ENCRYPT) +#define lmp_rswitch_capable(dev) ((dev)->features[0][0] & LMP_RSWITCH) +#define lmp_hold_capable(dev) ((dev)->features[0][0] & LMP_HOLD) +#define lmp_sniff_capable(dev) ((dev)->features[0][0] & LMP_SNIFF) +#define lmp_park_capable(dev) ((dev)->features[0][1] & LMP_PARK) +#define lmp_inq_rssi_capable(dev) ((dev)->features[0][3] & LMP_RSSI_INQ) +#define lmp_esco_capable(dev) ((dev)->features[0][3] & LMP_ESCO) +#define lmp_bredr_capable(dev) (!((dev)->features[0][4] & LMP_NO_BREDR)) +#define lmp_le_capable(dev) ((dev)->features[0][4] & LMP_LE) +#define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR) +#define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC) +#define lmp_ext_inq_capable(dev) ((dev)->features[0][6] & LMP_EXT_INQ) +#define lmp_le_br_capable(dev) (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR)) +#define lmp_ssp_capable(dev) ((dev)->features[0][6] & LMP_SIMPLE_PAIR) +#define lmp_no_flush_capable(dev) ((dev)->features[0][6] & LMP_NO_FLUSH) +#define lmp_lsto_capable(dev) ((dev)->features[0][7] & LMP_LSTO) +#define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR) +#define lmp_ext_feat_capable(dev) ((dev)->features[0][7] & LMP_EXTFEATURES) /* ----- Extended LMP capabilities ----- */ -#define lmp_host_ssp_capable(dev) ((dev)->host_features[0] & LMP_HOST_SSP) -#define lmp_host_le_capable(dev) !!((dev)->host_features[0] & LMP_HOST_LE) -#define lmp_host_le_br_capable(dev) !!((dev)->host_features[0] & LMP_HOST_LE_BREDR) +#define lmp_host_ssp_capable(dev) ((dev)->features[1][0] & LMP_HOST_SSP) +#define lmp_host_le_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE)) +#define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR)) /* returns true if at least one AMP active */ static inline bool hci_amp_capable(void) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e4d4d231fd8..8adc3915ece 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -433,9 +433,9 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) if (!status) { if (sent->mode) - hdev->host_features[0] |= LMP_HOST_SSP; + hdev->features[1][0] |= LMP_HOST_SSP; else - hdev->host_features[0] &= ~LMP_HOST_SSP; + hdev->features[1][0] &= ~LMP_HOST_SSP; } if (test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -493,18 +493,18 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, /* Adjust default settings according to features * supported by device. */ - if (hdev->features[0] & LMP_3SLOT) + if (hdev->features[0][0] & LMP_3SLOT) hdev->pkt_type |= (HCI_DM3 | HCI_DH3); - if (hdev->features[0] & LMP_5SLOT) + if (hdev->features[0][0] & LMP_5SLOT) hdev->pkt_type |= (HCI_DM5 | HCI_DH5); - if (hdev->features[1] & LMP_HV2) { + if (hdev->features[0][1] & LMP_HV2) { hdev->pkt_type |= (HCI_HV2); hdev->esco_type |= (ESCO_HV2); } - if (hdev->features[1] & LMP_HV3) { + if (hdev->features[0][1] & LMP_HV3) { hdev->pkt_type |= (HCI_HV3); hdev->esco_type |= (ESCO_HV3); } @@ -512,26 +512,26 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, if (lmp_esco_capable(hdev)) hdev->esco_type |= (ESCO_EV3); - if (hdev->features[4] & LMP_EV4) + if (hdev->features[0][4] & LMP_EV4) hdev->esco_type |= (ESCO_EV4); - if (hdev->features[4] & LMP_EV5) + if (hdev->features[0][4] & LMP_EV5) hdev->esco_type |= (ESCO_EV5); - if (hdev->features[5] & LMP_EDR_ESCO_2M) + if (hdev->features[0][5] & LMP_EDR_ESCO_2M) hdev->esco_type |= (ESCO_2EV3); - if (hdev->features[5] & LMP_EDR_ESCO_3M) + if (hdev->features[0][5] & LMP_EDR_ESCO_3M) hdev->esco_type |= (ESCO_3EV3); - if (hdev->features[5] & LMP_EDR_3S_ESCO) + if (hdev->features[0][5] & LMP_EDR_3S_ESCO) hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, - hdev->features[0], hdev->features[1], - hdev->features[2], hdev->features[3], - hdev->features[4], hdev->features[5], - hdev->features[6], hdev->features[7]); + hdev->features[0][0], hdev->features[0][1], + hdev->features[0][2], hdev->features[0][3], + hdev->features[0][4], hdev->features[0][5], + hdev->features[0][6], hdev->features[0][7]); } static void hci_cc_read_local_ext_features(struct hci_dev *hdev, @@ -544,14 +544,8 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, if (rp->status) return; - switch (rp->page) { - case 0: - memcpy(hdev->features, rp->features, 8); - break; - case 1: - memcpy(hdev->host_features, rp->features, 8); - break; - } + if (rp->page < HCI_MAX_PAGES) + memcpy(hdev->features[rp->page], rp->features, 8); } static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, @@ -1046,14 +1040,14 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (!status) { if (sent->le) - hdev->host_features[0] |= LMP_HOST_LE; + hdev->features[1][0] |= LMP_HOST_LE; else - hdev->host_features[0] &= ~LMP_HOST_LE; + hdev->features[1][0] &= ~LMP_HOST_LE; if (sent->simul) - hdev->host_features[0] |= LMP_HOST_LE_BREDR; + hdev->features[1][0] |= LMP_HOST_LE_BREDR; else - hdev->host_features[0] &= ~LMP_HOST_LE_BREDR; + hdev->features[1][0] &= ~LMP_HOST_LE_BREDR; } if (test_bit(HCI_MGMT, &hdev->dev_flags) && @@ -2076,7 +2070,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, goto unlock; if (!ev->status) - memcpy(conn->features, ev->features, 8); + memcpy(conn->features[0], ev->features, 8); if (conn->state != BT_CONFIG) goto unlock; @@ -2888,6 +2882,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, if (!conn) goto unlock; + if (ev->page < HCI_MAX_PAGES) + memcpy(conn->features[ev->page], ev->features, 8); + if (!ev->status && ev->page == 0x01) { struct inquiry_entry *ie; @@ -3346,11 +3343,16 @@ static void hci_remote_host_features_evt(struct hci_dev *hdev, { struct hci_ev_remote_host_features *ev = (void *) skb->data; struct inquiry_entry *ie; + struct hci_conn *conn; BT_DBG("%s", hdev->name); hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) + memcpy(conn->features[1], ev->features, 8); + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); if (ie) ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 6fe15c82284..7ad6ecf36f2 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -48,10 +48,10 @@ static ssize_t show_link_features(struct device *dev, struct hci_conn *conn = to_hci_conn(dev); return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", - conn->features[0], conn->features[1], - conn->features[2], conn->features[3], - conn->features[4], conn->features[5], - conn->features[6], conn->features[7]); + conn->features[0][0], conn->features[0][1], + conn->features[0][2], conn->features[0][3], + conn->features[0][4], conn->features[0][5], + conn->features[0][6], conn->features[0][7]); } #define LINK_ATTR(_name, _mode, _show, _store) \ @@ -233,10 +233,10 @@ static ssize_t show_features(struct device *dev, struct hci_dev *hdev = to_hci_dev(dev); return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", - hdev->features[0], hdev->features[1], - hdev->features[2], hdev->features[3], - hdev->features[4], hdev->features[5], - hdev->features[6], hdev->features[7]); + hdev->features[0][0], hdev->features[0][1], + hdev->features[0][2], hdev->features[0][3], + hdev->features[0][4], hdev->features[0][5], + hdev->features[0][6], hdev->features[0][7]); } static ssize_t show_manufacturer(struct device *dev, -- cgit v1.2.3-70-g09d2 From d2c5d77fff6ac0f43fc36f4fde020f726f773c1d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 17 Apr 2013 15:00:52 +0300 Subject: Bluetooth: Add reading of all local feature pages With the introduction of CSA4 there is now also a features page number 2 available. This patch increments the maximum supported page number to 2 and adds code for reading all available pages (as long as we have support for them - indicated by HCI_MAX_PAGES). Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 3 ++- net/bluetooth/hci_core.c | 10 ++++++++++ net/bluetooth/hci_event.c | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 26822967e29..80d718a9b31 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -134,7 +134,7 @@ struct amp_assoc { __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; }; -#define HCI_MAX_PAGES 2 +#define HCI_MAX_PAGES 3 #define NUM_REASSEMBLY 4 struct hci_dev { @@ -153,6 +153,7 @@ struct hci_dev { __u8 dev_class[3]; __u8 major_class; __u8 minor_class; + __u8 max_page; __u8 features[HCI_MAX_PAGES][8]; __u8 le_features[8]; __u8 le_white_list_size; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9570358adb7..e246d3782ac 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -589,6 +589,7 @@ static void hci_set_le_support(struct hci_request *req) static void hci_init3_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; + u8 p; if (hdev->commands[5] & 0x10) hci_setup_link_policy(req); @@ -597,6 +598,15 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_set_le_support(req); hci_update_ad(req); } + + /* Read features beyond page 1 if available */ + for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { + struct hci_cp_read_local_ext_features cp; + + cp.page = p; + hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, + sizeof(cp), &cp); + } } static int __hci_init(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8adc3915ece..3b2c0e07b25 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -544,6 +544,8 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, if (rp->status) return; + hdev->max_page = rp->max_page; + if (rp->page < HCI_MAX_PAGES) memcpy(hdev->features[rp->page], rp->features, 8); } -- cgit v1.2.3-70-g09d2 From 5df480b56e427d83830576862463226c8fcc95d7 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 4 Apr 2013 20:21:00 -0300 Subject: Bluetooth: Add LE scan type macros This patch adds macros for active and passive LE scan type values. The LE_SCAN_PASSIVE was also defined since it will be used in future by LE connection routine and GAP Observer Role support. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 3 +++ net/bluetooth/mgmt.c | 7 +++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index b3308927a0a..3f4266b3c3b 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -984,6 +984,9 @@ struct hci_cp_le_set_adv_data { #define HCI_OP_LE_SET_ADV_ENABLE 0x200a +#define LE_SCAN_PASSIVE 0x00 +#define LE_SCAN_ACTIVE 0x01 + #define HCI_OP_LE_SET_SCAN_PARAM 0x200b struct hci_cp_le_set_scan_param { __u8 type; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index cd2332f6cec..4c830c62ef7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -106,7 +106,6 @@ static const u16 mgmt_events[] = { * These LE scan and inquiry parameters were chosen according to LE General * Discovery Procedure specification. */ -#define LE_SCAN_TYPE 0x01 #define LE_SCAN_WIN 0x12 #define LE_SCAN_INT 0x12 #define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240) @@ -2703,7 +2702,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, + err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); break; @@ -2715,8 +2714,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN, - LE_SCAN_TIMEOUT_BREDR_LE); + err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, + LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE); break; default: -- cgit v1.2.3-70-g09d2 From 525e296a28561659d85a63befb694f36e6ec3429 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 4 Apr 2013 20:21:01 -0300 Subject: Bluetooth: Add macros for filter duplicates values This patch adds macros for filter_duplicates parameter values from HCI LE Set Scan Enable command. It also fixes le_scan_enable_req function so it uses the LE_SCAN_FILTER_DUP_ENABLE macro instead of a magic number. The LE_SCAN_FILTER_DUP_DISABLE was also defined since it will be required to properly support the GAP Observer Role. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 2 ++ net/bluetooth/hci_core.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 3f4266b3c3b..84c37abc48c 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -998,6 +998,8 @@ struct hci_cp_le_set_scan_param { #define LE_SCANNING_DISABLED 0x00 #define LE_SCANNING_ENABLED 0x01 +#define LE_SCAN_FILTER_DUP_DISABLE 0x00 +#define LE_SCAN_FILTER_DUP_ENABLE 0x01 #define HCI_OP_LE_SET_SCAN_ENABLE 0x200c struct hci_cp_le_set_scan_enable { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 00dcb74954d..d0ae237ff81 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1990,7 +1990,7 @@ static void le_scan_enable_req(struct hci_request *req, unsigned long opt) memset(&cp, 0, sizeof(cp)); cp.enable = 1; - cp.filter_dup = 1; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } -- cgit v1.2.3-70-g09d2 From 76a388beaf92cc75b829d4a0b7d69afaaeaa4b0a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 4 Apr 2013 20:21:02 -0300 Subject: Bluetooth: Rename LE_SCANNING_* macros This patch renames LE_SCANNING_ENABLED and LE_SCANNING_DISABLED macros to LE_SCAN_ENABLE and LE_SCAN_DISABLE in order to keep the same prefix others LE scan macros have. It also fixes le_scan_enable_req function so it uses the LE_SCAN_ ENABLE macro instead of a magic number. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 4 ++-- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 84c37abc48c..e0512aaef4b 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -996,8 +996,8 @@ struct hci_cp_le_set_scan_param { __u8 filter_policy; } __packed; -#define LE_SCANNING_DISABLED 0x00 -#define LE_SCANNING_ENABLED 0x01 +#define LE_SCAN_DISABLE 0x00 +#define LE_SCAN_ENABLE 0x01 #define LE_SCAN_FILTER_DUP_DISABLE 0x00 #define LE_SCAN_FILTER_DUP_ENABLE 0x01 diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d0ae237ff81..ce82265f561 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1989,7 +1989,7 @@ static void le_scan_enable_req(struct hci_request *req, unsigned long opt) struct hci_cp_le_set_scan_enable cp; memset(&cp, 0, sizeof(cp)); - cp.enable = 1; + cp.enable = LE_SCAN_ENABLE; cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3b2c0e07b25..b93cd2eb5d5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -964,7 +964,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, return; switch (cp->enable) { - case LE_SCANNING_ENABLED: + case LE_SCAN_ENABLE: if (status) { hci_dev_lock(hdev); mgmt_start_discovery_failed(hdev, status); @@ -979,7 +979,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, hci_dev_unlock(hdev); break; - case LE_SCANNING_DISABLED: + case LE_SCAN_DISABLE: if (status) { hci_dev_lock(hdev); mgmt_stop_discovery_failed(hdev, status); -- cgit v1.2.3-70-g09d2 From 0e280af026a5662ffd57c4e623b822df1f7f47ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Apr 2013 06:52:51 +0000 Subject: tcp: introduce TCPSpuriousRtxHostQueues SNMP counter Host queues (Qdisc + NIC) can hold packets so long that TCP can eventually retransmit a packet before the first transmit even left the host. Its not clear right now if we could avoid this in the first place : - We could arm RTO timer not at the time we enqueue packets, but at the time we TX complete them (tcp_wfree()) - Cancel the sending of the new copy of the packet if prior one is still in queue. This patch adds instrumentation so that we can at least see how often this problem happens. TCPSpuriousRtxHostQueues SNMP counter is incremented every time we detect the fast clone is not yet freed in tcp_transmit_skb() Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Tom Herbert Cc: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_output.c | 7 +++++++ 3 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index e00013a1deb..fefdec91c68 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -247,6 +247,7 @@ enum LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b6f2ea17489..6da51d55d03 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -269,6 +269,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), + SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d1269435354..5f28131eb37 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -846,6 +846,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, __net_timestamp(skb); if (likely(clone_it)) { + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else -- cgit v1.2.3-70-g09d2 From 07dc93dd14957dc1faba08f0aadd27b082e35ba2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 19 Apr 2013 10:14:51 +0300 Subject: Bluetooth: Fix HCI command send functions to use const specifier All HCI command send functions that take a pointer to the command parameters do not need to modify the content in any way (they merely copy the data to an skb). Therefore, the parameter type should be declared const. This also allows passing already const parameters to these APIs which previously would have generated a compiler warning. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 14 ++++++++------ net/bluetooth/hci_core.c | 16 +++++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 80d718a9b31..35a57cd1704 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1081,17 +1081,19 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param); -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, - u8 event); +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param); +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event); void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u32 timeout); + const void *param, u32 timeout); struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u8 event, u32 timeout); + const void *param, u8 event, u32 timeout); -int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param); +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, + const void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ce82265f561..215db0801a6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -134,7 +134,7 @@ failed: } struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u8 event, u32 timeout) + const void *param, u8 event, u32 timeout) { DECLARE_WAITQUEUE(wait, current); struct hci_request req; @@ -188,7 +188,7 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_sync_ev); struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - void *param, u32 timeout) + const void *param, u32 timeout) { return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); } @@ -2602,7 +2602,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) } static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, - u32 plen, void *param) + u32 plen, const void *param) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; @@ -2628,7 +2628,8 @@ static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, } /* Send HCI command */ -int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, + const void *param) { struct sk_buff *skb; @@ -2652,8 +2653,8 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) } /* Queue a command to an asynchronous HCI request */ -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, - u8 event) +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -2682,7 +2683,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void *param, skb_queue_tail(&req->cmd_q, skb); } -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param) +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param) { hci_req_add_ev(req, opcode, plen, param, 0); } -- cgit v1.2.3-70-g09d2 From f646968f8f7c624587de729115d802372b9063dd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 19 Apr 2013 02:04:27 +0000 Subject: net: vlan: rename NETIF_F_HW_VLAN_* feature flags to NETIF_F_HW_VLAN_CTAG_* Rename the hardware VLAN acceleration features to include "CTAG" to indicate that they only support CTAGs. Follow up patches will introduce 802.1ad server provider tagging (STAGs) and require the distinction for hardware not supporting acclerating both. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/infiniband/hw/nes/nes_nic.c | 14 ++++++------ drivers/net/bonding/bond_main.c | 6 +++--- drivers/net/ethernet/3com/typhoon.c | 4 ++-- drivers/net/ethernet/adaptec/starfire.c | 2 +- drivers/net/ethernet/alteon/acenic.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 4 ++-- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 22 +++++++++---------- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 14 ++++++------ drivers/net/ethernet/atheros/atlx/atl1.c | 4 ++-- drivers/net/ethernet/atheros/atlx/atl2.c | 16 +++++++------- drivers/net/ethernet/atheros/atlx/atlx.c | 10 ++++----- drivers/net/ethernet/broadcom/bnx2.c | 10 ++++----- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++-- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/brocade/bna/bnad.c | 4 ++-- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 13 +++++------ drivers/net/ethernet/chelsio/cxgb/sge.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 20 +++++++++-------- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 ++++----- .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 15 +++++++------ drivers/net/ethernet/cisco/enic/enic_main.c | 4 ++-- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- drivers/net/ethernet/freescale/gianfar.c | 15 +++++++------ drivers/net/ethernet/freescale/gianfar_ethtool.c | 2 +- drivers/net/ethernet/ibm/ehea/ehea_main.c | 8 +++---- drivers/net/ethernet/intel/e1000/e1000_main.c | 16 +++++++------- drivers/net/ethernet/intel/e1000e/netdev.c | 10 ++++----- drivers/net/ethernet/intel/igb/igb_main.c | 18 ++++++++-------- drivers/net/ethernet/intel/igbvf/netdev.c | 6 +++--- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 14 ++++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +++--- drivers/net/ethernet/jme.c | 4 ++-- drivers/net/ethernet/marvell/sky2.c | 9 ++++---- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++-- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 7 +++--- drivers/net/ethernet/natsemi/ns83820.c | 2 +- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/neterion/vxge/vxge-main.c | 4 ++-- drivers/net/ethernet/nvidia/forcedeth.c | 20 +++++++++-------- .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 ++-- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 16 +++++++------- drivers/net/ethernet/realtek/8139cp.c | 6 +++--- drivers/net/ethernet/realtek/r8169.c | 14 ++++++------ drivers/net/ethernet/renesas/sh_eth.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/tehuti/tehuti.c | 6 +++--- drivers/net/ethernet/ti/cpsw.c | 4 ++-- drivers/net/ethernet/toshiba/spider_net.c | 4 ++-- drivers/net/ethernet/via/via-rhine.c | 5 +++-- drivers/net/ethernet/via/via-velocity.c | 7 +++--- drivers/net/ethernet/xilinx/ll_temac_main.c | 6 +++--- drivers/net/hyperv/netvsc_drv.c | 2 +- drivers/net/ifb.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/team/team.c | 6 +++--- drivers/net/usb/cdc_mbim.c | 2 +- drivers/net/veth.c | 2 +- drivers/net/virtio_net.c | 2 +- drivers/net/vmxnet3/vmxnet3_drv.c | 11 +++++----- drivers/net/vmxnet3/vmxnet3_ethtool.c | 5 +++-- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 6 +++--- drivers/scsi/fcoe/fcoe.c | 2 +- include/linux/if_vlan.h | 4 ++-- include/linux/netdev_features.h | 12 +++++------ include/linux/netdevice.h | 6 ++++-- net/8021q/vlan.c | 6 +++--- net/8021q/vlan_core.c | 4 ++-- net/8021q/vlan_dev.c | 2 +- net/bridge/br_device.c | 4 ++-- net/bridge/br_vlan.c | 6 +++--- net/core/dev.c | 9 ++++---- net/core/ethtool.c | 25 +++++++++++----------- net/core/netpoll.c | 2 +- net/openvswitch/vport-internal_dev.c | 2 +- 78 files changed, 285 insertions(+), 266 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 85cf4d1ac44..49eb5111d2c 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1599,7 +1599,7 @@ static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, /* Enable/Disable VLAN Stripping */ u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) u32temp &= 0xfdffffff; else u32temp |= 0x02000000; @@ -1614,10 +1614,10 @@ static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_feat * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -1628,7 +1628,7 @@ static int nes_set_features(struct net_device *netdev, netdev_features_t feature struct nes_device *nesdev = nesvnic->nesdev; u32 changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) nes_vlan_mode(netdev, nesdev, features); return 0; @@ -1706,11 +1706,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->dev_addr[4] = (u8)(u64temp>>8); netdev->dev_addr[5] = (u8)u64temp; - netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX; + netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX; if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) netdev->hw_features |= NETIF_F_TSO; - netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX; + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_LRO; nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2aac890320c..8d324f8a175 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4322,9 +4322,9 @@ static void bond_setup(struct net_device *bond_dev) */ bond_dev->hw_features = BOND_VLAN_FEATURES | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); bond_dev->features |= bond_dev->hw_features; diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 27aaaf99e73..839a9621374 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2445,9 +2445,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * settings -- so we only allow the user to toggle the TX processing. */ dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; dev->features = dev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM; if(register_netdev(dev) < 0) { err_msg = "unable to register netdev"; diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 549b7750057..365865130f7 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -702,7 +702,7 @@ static int starfire_init_one(struct pci_dev *pdev, #endif /* ZEROCOPY */ #ifdef VLAN_SUPPORT - dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; #endif /* VLAN_RX_KILL_VID */ #ifdef ADDR_64BITS dev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index c0bc41a784c..a7689d93105 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -472,7 +472,7 @@ static int acenic_probe_one(struct pci_dev *pdev, ap->name = pci_name(pdev); dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->watchdog_timeo = 5*HZ; diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 42d4e6ad58a..6bad84d6e2c 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1869,7 +1869,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, SET_NETDEV_DEV(dev, &pdev->dev); #if AMD8111E_VLAN_TAG_USED - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX ; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX ; #endif lp = netdev_priv(dev); @@ -1907,7 +1907,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32); #if AMD8111E_VLAN_TAG_USED - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; #endif /* Probe the external PHY */ amd8111e_probe_ext_phy(dev); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 1f07fc633ab..3565255cdd8 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -417,7 +417,7 @@ static void atl1c_set_multi(struct net_device *netdev) static void __atl1c_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *mac_ctrl_data |= MAC_CTRL_RMV_VLAN; } else { @@ -494,10 +494,10 @@ static netdev_features_t atl1c_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; if (netdev->mtu > MAX_TSO_FRAME_SIZE) features &= ~(NETIF_F_TSO | NETIF_F_TSO6); @@ -510,7 +510,7 @@ static int atl1c_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atl1c_vlan_mode(netdev, features); return 0; @@ -2475,13 +2475,13 @@ static int atl1c_init_netdev(struct net_device *netdev, struct pci_dev *pdev) atl1c_set_ethtool_ops(netdev); /* TODO: add when ready */ - netdev->hw_features = NETIF_F_SG | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_RX | - NETIF_F_TSO | + netdev->hw_features = NETIF_F_SG | + NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_TSO | NETIF_F_TSO6; - netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_TX; + netdev->features = netdev->hw_features | + NETIF_F_HW_VLAN_CTAG_TX; return 0; } diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index d058d0061ed..598a6115166 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -315,7 +315,7 @@ static void atl1e_set_multi(struct net_device *netdev) static void __atl1e_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *mac_ctrl_data |= MAC_CTRL_RMV_VLAN; } else { @@ -378,10 +378,10 @@ static netdev_features_t atl1e_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -391,7 +391,7 @@ static int atl1e_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atl1e_vlan_mode(netdev, features); return 0; @@ -2198,9 +2198,9 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev) atl1e_set_ethtool_ops(netdev); netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_RX; netdev->features = netdev->hw_features | NETIF_F_LLTX | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; return 0; } diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 8338013ab33..fd7d85044e4 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -3018,10 +3018,10 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features = NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SG; - netdev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX); + netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); netdev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_TSO | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_RX; /* is this valid? see atl1_setup_mac_ctrl() */ netdev->features |= NETIF_F_RXCSUM; diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index a046b6ff847..6b2c08a89b7 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -363,7 +363,7 @@ static inline void atl2_irq_disable(struct atl2_adapter *adapter) static void __atl2_vlan_mode(netdev_features_t features, u32 *ctrl) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *ctrl |= MAC_CTRL_RMV_VLAN; } else { @@ -399,10 +399,10 @@ static netdev_features_t atl2_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -412,7 +412,7 @@ static int atl2_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atl2_vlan_mode(netdev, features); return 0; @@ -887,7 +887,7 @@ static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb, skb->len-copy_len); offset = ((u32)(skb->len-copy_len + 3) & ~3); } -#ifdef NETIF_F_HW_VLAN_TX +#ifdef NETIF_F_HW_VLAN_CTAG_TX if (vlan_tx_tag_present(skb)) { u16 vlan_tag = vlan_tx_tag_get(skb); vlan_tag = (vlan_tag << 4) | @@ -1413,8 +1413,8 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -EIO; - netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_RX; - netdev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX); + netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX; + netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); /* Init PHY as early as possible due to power saving issue */ atl2_phy_init(&adapter->hw); diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c index f82eb169946..46a622cceee 100644 --- a/drivers/net/ethernet/atheros/atlx/atlx.c +++ b/drivers/net/ethernet/atheros/atlx/atlx.c @@ -220,7 +220,7 @@ static void atlx_link_chg_task(struct work_struct *work) static void __atlx_vlan_mode(netdev_features_t features, u32 *ctrl) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *ctrl |= MAC_CTRL_RMV_VLAN; } else { @@ -257,10 +257,10 @@ static netdev_features_t atlx_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -270,7 +270,7 @@ static int atlx_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atlx_vlan_mode(netdev, features); return 0; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index f27b549b692..42a8bc8df5d 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -3553,7 +3553,7 @@ bnx2_set_rx_mode(struct net_device *dev) rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS | BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG); sort_mode = 1 | BNX2_RPM_SORT_USER0_BC_EN; - if (!(dev->features & NETIF_F_HW_VLAN_RX) && + if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX) && (bp->flags & BNX2_FLAG_CAN_KEEP_VLAN)) rx_mode |= BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG; if (dev->flags & IFF_PROMISC) { @@ -7695,7 +7695,7 @@ bnx2_fix_features(struct net_device *dev, netdev_features_t features) struct bnx2 *bp = netdev_priv(dev); if (!(bp->flags & BNX2_FLAG_CAN_KEEP_VLAN)) - features |= NETIF_F_HW_VLAN_RX; + features |= NETIF_F_HW_VLAN_CTAG_RX; return features; } @@ -7706,12 +7706,12 @@ bnx2_set_features(struct net_device *dev, netdev_features_t features) struct bnx2 *bp = netdev_priv(dev); /* TSO with VLAN tag won't work with current firmware */ - if (features & NETIF_F_HW_VLAN_TX) + if (features & NETIF_F_HW_VLAN_CTAG_TX) dev->vlan_features |= (dev->hw_features & NETIF_F_ALL_TSO); else dev->vlan_features &= ~NETIF_F_ALL_TSO; - if ((!!(features & NETIF_F_HW_VLAN_RX) != + if ((!!(features & NETIF_F_HW_VLAN_CTAG_RX) != !!(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG)) && netif_running(dev)) { bnx2_netif_stop(bp, false); @@ -8551,7 +8551,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; dev->vlan_features = dev->hw_features; - dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->features |= dev->hw_features; dev->priv_flags |= IFF_UNICAST_FLT; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index fdfe33bc097..1e60c5d139d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12027,7 +12027,7 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO | - NETIF_F_RXHASH | NETIF_F_HW_VLAN_TX; + NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX; if (!CHIP_IS_E1x(bp)) { dev->hw_features |= NETIF_F_GSO_GRE; dev->hw_enc_features = @@ -12039,7 +12039,7 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA; - dev->features |= dev->hw_features | NETIF_F_HW_VLAN_RX; + dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX; if (bp->flags & USING_DAC_FLAG) dev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 45719ddfde7..0c22c9a059c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17197,7 +17197,7 @@ static int tg3_init_one(struct pci_dev *pdev, tg3_init_bufmgr_config(tp); - features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; /* 5700 B0 chips do not support checksumming correctly due * to hardware bugs. diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index d588f842d55..1d9d0371a74 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3170,14 +3170,14 @@ bnad_netdev_init(struct bnad *bnad, bool using_dac) netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_TX; + NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_TX; netdev->vlan_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; netdev->features |= netdev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; if (using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 20d2085f61c..9624cfe7df5 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -856,10 +856,10 @@ static netdev_features_t t1_fix_features(struct net_device *dev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -869,7 +869,7 @@ static int t1_set_features(struct net_device *dev, netdev_features_t features) netdev_features_t changed = dev->features ^ features; struct adapter *adapter = dev->ml_priv; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) t1_vlan_mode(adapter, features); return 0; @@ -1085,8 +1085,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features |= NETIF_F_HIGHDMA; if (vlan_tso_capable(adapter)) { netdev->features |= - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; - netdev->hw_features |= NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; /* T204: disable TSO */ if (!(is_T2(adapter)) || bi->port_number != 4) { diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 55fe8c9f048..f85e0659432 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -734,7 +734,7 @@ void t1_vlan_mode(struct adapter *adapter, netdev_features_t features) { struct sge *sge = adapter->sge; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) sge->sge_control |= F_VLAN_XTRACT; else sge->sge_control &= ~F_VLAN_XTRACT; diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 2b5e62193ce..71497e835f4 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -1181,14 +1181,15 @@ static void cxgb_vlan_mode(struct net_device *dev, netdev_features_t features) if (adapter->params.rev > 0) { t3_set_vlan_accel(adapter, 1 << pi->port_id, - features & NETIF_F_HW_VLAN_RX); + features & NETIF_F_HW_VLAN_CTAG_RX); } else { /* single control for all ports */ - unsigned int i, have_vlans = features & NETIF_F_HW_VLAN_RX; + unsigned int i, have_vlans = features & NETIF_F_HW_VLAN_CTAG_RX; for_each_port(adapter, i) have_vlans |= - adapter->port[i]->features & NETIF_F_HW_VLAN_RX; + adapter->port[i]->features & + NETIF_F_HW_VLAN_CTAG_RX; t3_set_vlan_accel(adapter, 1, have_vlans); } @@ -2563,10 +2564,10 @@ static netdev_features_t cxgb_fix_features(struct net_device *dev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -2575,7 +2576,7 @@ static int cxgb_set_features(struct net_device *dev, netdev_features_t features) { netdev_features_t changed = dev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) cxgb_vlan_mode(dev, features); return 0; @@ -3288,8 +3289,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len - 1; netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX; - netdev->features |= netdev->hw_features | NETIF_F_HW_VLAN_TX; + NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX; + netdev->features |= netdev->hw_features | + NETIF_F_HW_VLAN_CTAG_TX; netdev->vlan_features |= netdev->features & VLAN_FEAT; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e76cf035100..6a6a01af75f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -559,7 +559,7 @@ static int link_start(struct net_device *dev) * that step explicitly. */ ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1, - !!(dev->features & NETIF_F_HW_VLAN_RX), true); + !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true); if (ret == 0) { ret = t4_change_mac(pi->adapter, mb, pi->viid, pi->xact_addr_filt, dev->dev_addr, true, @@ -2722,14 +2722,14 @@ static int cxgb_set_features(struct net_device *dev, netdev_features_t features) netdev_features_t changed = dev->features ^ features; int err; - if (!(changed & NETIF_F_HW_VLAN_RX)) + if (!(changed & NETIF_F_HW_VLAN_CTAG_RX)) return 0; err = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1, -1, -1, -1, - !!(features & NETIF_F_HW_VLAN_RX), true); + !!(features & NETIF_F_HW_VLAN_CTAG_RX), true); if (unlikely(err)) - dev->features = features ^ NETIF_F_HW_VLAN_RX; + dev->features = features ^ NETIF_F_HW_VLAN_CTAG_RX; return err; } @@ -5628,7 +5628,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 7fcac200376..73aef76a526 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1100,10 +1100,10 @@ static netdev_features_t cxgb4vf_fix_features(struct net_device *dev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -1114,9 +1114,9 @@ static int cxgb4vf_set_features(struct net_device *dev, struct port_info *pi = netdev_priv(dev); netdev_features_t changed = dev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1, - features & NETIF_F_HW_VLAN_TX, 0); + features & NETIF_F_HW_VLAN_CTAG_TX, 0); return 0; } @@ -2623,11 +2623,12 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM; netdev->vlan_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA; - netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX; + netdev->features = netdev->hw_features | + NETIF_F_HW_VLAN_CTAG_TX; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index ec1a233622c..05c1e59b6bf 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2496,9 +2496,9 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->watchdog_timeo = 2 * HZ; netdev->ethtool_ops = &enic_ethtool_ops; - netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (ENIC_SETTING(enic, LOOP)) { - netdev->features &= ~NETIF_F_HW_VLAN_TX; + netdev->features &= ~NETIF_F_HW_VLAN_CTAG_TX; enic->loop_enable = 1; enic->loop_tag = enic->config.loop_tag; dev_info(dev, "loopback tag=0x%04x\n", enic->loop_tag); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 536afa2fb94..bde26d4d52e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3663,12 +3663,12 @@ static void be_netdev_init(struct net_device *netdev) netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; if (be_multi_rxq(adapter)) netdev->hw_features |= NETIF_F_RXHASH; netdev->features |= netdev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 96fbe354824..51555445ce2 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -386,7 +386,7 @@ static void gfar_init_mac(struct net_device *ndev) priv->uses_rxfcb = 1; } - if (ndev->features & NETIF_F_HW_VLAN_RX) { + if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX) { rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT; priv->uses_rxfcb = 1; } @@ -1050,8 +1050,9 @@ static int gfar_probe(struct platform_device *ofdev) } if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) { - dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; - dev->features |= NETIF_F_HW_VLAN_RX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; } if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) { @@ -2348,7 +2349,7 @@ void gfar_vlan_mode(struct net_device *dev, netdev_features_t features) local_irq_save(flags); lock_rx_qs(priv); - if (features & NETIF_F_HW_VLAN_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) { /* Enable VLAN tag insertion */ tempval = gfar_read(®s->tctrl); tempval |= TCTRL_VLINS; @@ -2360,7 +2361,7 @@ void gfar_vlan_mode(struct net_device *dev, netdev_features_t features) gfar_write(®s->tctrl, tempval); } - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* Enable VLAN tag extraction */ tempval = gfar_read(®s->rctrl); tempval |= (RCTRL_VLEX | RCTRL_PRSDEP_INIT); @@ -2724,11 +2725,11 @@ static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb, /* Tell the skb what kind of packet this is */ skb->protocol = eth_type_trans(skb, dev); - /* There's need to check for NETIF_F_HW_VLAN_RX here. + /* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here. * Even if vlan rx accel is disabled, on some chips * RXFCB_VLN is pseudo randomly set. */ - if (dev->features & NETIF_F_HW_VLAN_RX && + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX && fcb->flags & RXFCB_VLN) __vlan_hwaccel_put_tag(skb, fcb->vlctl); diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 083603f6bec..21cd88124ca 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -542,7 +542,7 @@ int gfar_set_features(struct net_device *dev, netdev_features_t features) int err = 0, i = 0; netdev_features_t changed = dev->features ^ features; - if (changed & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX)) + if (changed & (NETIF_F_HW_VLAN_CTAG_TX|NETIF_F_HW_VLAN_CTAG_RX)) gfar_vlan_mode(dev, features); if (!(changed & NETIF_F_RXCSUM)) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 02963343447..9c9fa745ff8 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3021,11 +3021,11 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, ehea_set_ethtool_ops(dev); dev->hw_features = NETIF_F_SG | NETIF_F_TSO - | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX; + | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_CTAG_TX; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO - | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX - | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER - | NETIF_F_RXCSUM; + | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | + | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM; dev->vlan_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM; dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT; diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index d98e1d0996d..8d0d0d420c2 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -809,10 +809,10 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev, /* Since there is no support for separate Rx/Tx vlan accel * enable/disable make sure Tx flag is always in same state as Rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -823,7 +823,7 @@ static int e1000_set_features(struct net_device *netdev, struct e1000_adapter *adapter = netdev_priv(netdev); netdev_features_t changed = features ^ netdev->features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) e1000_vlan_mode(netdev, features); if (!(changed & (NETIF_F_RXCSUM | NETIF_F_RXALL))) @@ -1058,9 +1058,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac_type >= e1000_82543) { netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_RX; - netdev->features = NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX; + netdev->features = NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_FILTER; } if ((hw->mac_type >= e1000_82544) && @@ -4785,7 +4785,7 @@ static void __e1000_vlan_mode(struct e1000_adapter *adapter, u32 ctrl; ctrl = er32(CTRL); - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ ctrl |= E1000_CTRL_VME; } else { diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index b18fad5b579..a2e7db33bf9 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3373,7 +3373,7 @@ static void e1000e_set_rx_mode(struct net_device *netdev) ew32(RCTL, rctl); - if (netdev->features & NETIF_F_HW_VLAN_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) e1000e_vlan_strip_enable(adapter); else e1000e_vlan_strip_disable(adapter); @@ -6418,7 +6418,7 @@ static int e1000_set_features(struct net_device *netdev, if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) adapter->flags |= FLAG_TSO_FORCE; - if (!(changed & (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | + if (!(changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_RXFCS | NETIF_F_RXALL))) return 0; @@ -6629,8 +6629,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Set initial default active device features */ netdev->features = (NETIF_F_SG | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | @@ -6644,7 +6644,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXALL; if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) - netdev->features |= NETIF_F_HW_VLAN_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= (NETIF_F_SG | NETIF_F_TSO | diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 38590252be6..b0b1777c0af 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1860,10 +1860,10 @@ static netdev_features_t igb_fix_features(struct net_device *netdev, /* Since there is no support for separate Rx/Tx vlan accel * enable/disable make sure Tx flag is always in same state as Rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -1874,7 +1874,7 @@ static int igb_set_features(struct net_device *netdev, netdev_features_t changed = netdev->features ^ features; struct igb_adapter *adapter = netdev_priv(netdev); - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) igb_vlan_mode(netdev, features); if (!(changed & NETIF_F_RXALL)) @@ -2127,15 +2127,15 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; netdev->hw_features |= NETIF_F_RXALL; /* set this bit last since it cannot be part of hw_features */ - netdev->features |= NETIF_F_HW_VLAN_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_TSO | NETIF_F_TSO6 | @@ -6674,7 +6674,7 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, igb_ptp_rx_hwtstamp(rx_ring->q_vector, rx_desc, skb); - if ((dev->features & NETIF_F_HW_VLAN_RX) && + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { u16 vid; if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && @@ -6954,7 +6954,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl; - bool enable = !!(features & NETIF_F_HW_VLAN_RX); + bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); if (enable) { /* enable VLAN tag insert/strip */ diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index bea46bb2606..33e7b3069fb 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2722,9 +2722,9 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_RXCSUM; netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 5dc119fd95a..e65d9e91022 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -332,8 +332,8 @@ ixgb_fix_features(struct net_device *netdev, netdev_features_t features) * Tx VLAN insertion does not work per HW design when Rx stripping is * disabled. */ - if (!(features & NETIF_F_HW_VLAN_RX)) - features &= ~NETIF_F_HW_VLAN_TX; + if (!(features & NETIF_F_HW_VLAN_CTAG_RX)) + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -344,7 +344,7 @@ ixgb_set_features(struct net_device *netdev, netdev_features_t features) struct ixgb_adapter *adapter = netdev_priv(netdev); netdev_features_t changed = features ^ netdev->features; - if (!(changed & (NETIF_F_RXCSUM|NETIF_F_HW_VLAN_RX))) + if (!(changed & (NETIF_F_RXCSUM|NETIF_F_HW_VLAN_CTAG_RX))) return 0; adapter->rx_csum = !!(features & NETIF_F_RXCSUM); @@ -479,10 +479,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_RXCSUM; if (pci_using_dac) { @@ -1140,7 +1140,7 @@ ixgb_set_multi(struct net_device *netdev) } alloc_failed: - if (netdev->features & NETIF_F_HW_VLAN_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) ixgb_vlan_strip_enable(adapter); else ixgb_vlan_strip_disable(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c022f9c417a..0316b65dfe0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1488,7 +1488,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); - if ((dev->features & NETIF_F_HW_VLAN_RX) && + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); __vlan_hwaccel_put_tag(skb, vid); @@ -3722,7 +3722,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - if (netdev->features & NETIF_F_HW_VLAN_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) ixgbe_vlan_strip_enable(adapter); else ixgbe_vlan_strip_disable(adapter); @@ -7024,7 +7024,7 @@ static int ixgbe_set_features(struct net_device *netdev, break; } - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) ixgbe_vlan_strip_enable(adapter); else ixgbe_vlan_strip_disable(adapter); @@ -7431,9 +7431,9 @@ skip_sriov: netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index b3e6530637e..2d4bdcc4fdb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -35,7 +35,7 @@ #include #include #include -#ifdef NETIF_F_HW_VLAN_TX +#ifdef NETIF_F_HW_VLAN_CTAG_TX #include #endif diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index eeae9349f78..8f907b7af31 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3410,9 +3410,9 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_RXCSUM; netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 0519afa413d..d28ce6f9717 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -3030,8 +3030,8 @@ jme_init_one(struct pci_dev *pdev, NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; if (using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 6a0e671fcec..bf9da1b7b09 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -1421,14 +1421,14 @@ static void sky2_vlan_mode(struct net_device *dev, netdev_features_t features) struct sky2_hw *hw = sky2->hw; u16 port = sky2->port; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T), RX_VLAN_STRIP_ON); else sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T), RX_VLAN_STRIP_OFF); - if (features & NETIF_F_HW_VLAN_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) { sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), TX_VLAN_TAG_ON); @@ -4406,7 +4406,7 @@ static int sky2_set_features(struct net_device *dev, netdev_features_t features) if (changed & NETIF_F_RXHASH) rx_set_rss(dev, features); - if (changed & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX)) + if (changed & (NETIF_F_HW_VLAN_CTAG_TX|NETIF_F_HW_VLAN_CTAG_RX)) sky2_vlan_mode(dev, features); return 0; @@ -4793,7 +4793,8 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, dev->hw_features |= NETIF_F_RXHASH; if (!(hw->flags & SKY2_HW_VLAN_BROKEN)) { - dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features |= SKY2_VLAN_OFFLOADS; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d2a4f919bf1..b2ba39c7143 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2082,8 +2082,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH; dev->features = dev->hw_features | NETIF_F_HIGHDMA | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; dev->hw_features |= NETIF_F_LOOPBACK; if (mdev->dev->caps.steering_mode == diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index d5ffdc8264e..46262ea610f 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1281,7 +1281,8 @@ myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb) va = addr; va += MXGEFW_PAD; veh = (struct vlan_ethhdr *)va; - if ((dev->features & NETIF_F_HW_VLAN_RX) == NETIF_F_HW_VLAN_RX && + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) == + NETIF_F_HW_VLAN_CTAG_RX && veh->h_vlan_proto == htons(ETH_P_8021Q)) { /* fixup csum if needed */ if (skb->ip_summed == CHECKSUM_COMPLETE) { @@ -3887,8 +3888,8 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->mtu = myri10ge_initial_mtu; netdev->hw_features = mgp->features | NETIF_F_RXCSUM; - /* fake NETIF_F_HW_VLAN_RX for good GRO performance */ - netdev->hw_features |= NETIF_F_HW_VLAN_RX; + /* fake NETIF_F_HW_VLAN_CTAG_RX for good GRO performance */ + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->features = netdev->hw_features; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 77c070de621..60267e91cbd 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -2193,7 +2193,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev, #ifdef NS83820_VLAN_ACCEL_SUPPORT /* We also support hardware vlan acceleration */ - ndev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + ndev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; #endif if (using_dac) { diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 3371ff41bb3..ec82d59b03e 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7920,7 +7920,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO; dev->features |= dev->hw_features | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (sp->device_type & XFRAME_II_DEVICE) { dev->hw_features |= NETIF_F_UFO; if (ufo) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 794444e0949..e9e58aadf87 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3415,12 +3415,12 @@ static int vxge_device_register(struct __vxge_hw_device *hldev, ndev->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; if (vdev->config.rth_steering != NO_STEERING) ndev->hw_features |= NETIF_F_RXHASH; ndev->features |= ndev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &vxge_netdev_ops; diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 5ae12471979..fcad64081d7 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2961,11 +2961,11 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) vlanflags = le32_to_cpu(np->get_rx.ex->buflow); /* - * There's need to check for NETIF_F_HW_VLAN_RX here. - * Even if vlan rx accel is disabled, + * There's need to check for NETIF_F_HW_VLAN_CTAG_RX + * here. Even if vlan rx accel is disabled, * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set. */ - if (dev->features & NETIF_F_HW_VLAN_RX && + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX && vlanflags & NV_RX3_VLAN_TAG_PRESENT) { u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK; @@ -4816,7 +4816,7 @@ static netdev_features_t nv_fix_features(struct net_device *dev, netdev_features_t features) { /* vlan is dependent on rx checksum offload */ - if (features & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX)) + if (features & (NETIF_F_HW_VLAN_CTAG_TX|NETIF_F_HW_VLAN_CTAG_RX)) features |= NETIF_F_RXCSUM; return features; @@ -4828,12 +4828,12 @@ static void nv_vlan_mode(struct net_device *dev, netdev_features_t features) spin_lock_irq(&np->lock); - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP; else np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP; - if (features & NETIF_F_HW_VLAN_TX) + if (features & NETIF_F_HW_VLAN_CTAG_TX) np->txrxctl_bits |= NVREG_TXRXCTL_VLANINS; else np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS; @@ -4870,7 +4870,7 @@ static int nv_set_features(struct net_device *dev, netdev_features_t features) spin_unlock_irq(&np->lock); } - if (changed & (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX)) + if (changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX)) nv_vlan_mode(dev, features); return 0; @@ -5705,7 +5705,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) np->vlanctl_bits = 0; if (id->driver_data & DEV_HAS_VLAN) { np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE; - dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; } dev->features |= dev->hw_features; @@ -5996,7 +5997,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) dev->features & NETIF_F_HIGHDMA ? "highdma " : "", dev->features & (NETIF_F_IP_CSUM | NETIF_F_SG) ? "csum " : "", - dev->features & (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX) ? + dev->features & (NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX) ? "vlan " : "", dev->features & (NETIF_F_LOOPBACK) ? "loopback " : "", diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 7867aebc05f..af951f343ff 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1345,7 +1345,7 @@ netxen_setup_netdev(struct netxen_adapter *adapter, } if (adapter->capabilities & NX_FW_CAPABILITY_FVLANTX) - netdev->hw_features |= NETIF_F_HW_VLAN_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; if (adapter->capabilities & NX_FW_CAPABILITY_HW_LRO) netdev->hw_features |= NETIF_F_LRO; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 0d00b2bd2c8..845ba1d1c3c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -1714,7 +1714,7 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, netdev->features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO | - NETIF_F_HW_VLAN_RX); + NETIF_F_HW_VLAN_CTAG_RX); netdev->vlan_features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); @@ -1729,7 +1729,7 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, } if (qlcnic_vlan_tx_check(adapter)) - netdev->features |= (NETIF_F_HW_VLAN_TX); + netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX); if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_HW_LRO) netdev->features |= NETIF_F_LRO; diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 1dd778a6f01..8e3f43c7566 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -409,7 +409,7 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type, (qdev-> func << CAM_OUT_FUNC_SHIFT) | (0 << CAM_OUT_CQ_ID_SHIFT)); - if (qdev->ndev->features & NETIF_F_HW_VLAN_RX) + if (qdev->ndev->features & NETIF_F_HW_VLAN_CTAG_RX) cam_output |= CAM_OUT_RV; /* route to NIC core */ ql_write32(qdev, MAC_ADDR_DATA, cam_output); @@ -2279,7 +2279,7 @@ static void qlge_vlan_mode(struct net_device *ndev, netdev_features_t features) { struct ql_adapter *qdev = netdev_priv(ndev); - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { ql_write32(qdev, NIC_RCV_CFG, NIC_RCV_CFG_VLAN_MASK | NIC_RCV_CFG_VLAN_MATCH_AND_NON); } else { @@ -2294,10 +2294,10 @@ static netdev_features_t qlge_fix_features(struct net_device *ndev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -2307,7 +2307,7 @@ static int qlge_set_features(struct net_device *ndev, { netdev_features_t changed = ndev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) qlge_vlan_mode(ndev, features); return 0; @@ -4665,9 +4665,9 @@ static int qlge_probe(struct pci_dev *pdev, SET_NETDEV_DEV(ndev, &pdev->dev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | - NETIF_F_HW_VLAN_TX | NETIF_F_RXCSUM; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_RXCSUM; ndev->features = ndev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->vlan_features = ndev->hw_features; if (test_bit(QL_DMA64, &qdev->flags)) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index b62a32484f6..6d03b52e56f 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1438,7 +1438,7 @@ static int cp_set_features(struct net_device *dev, netdev_features_t features) else cp->cpcmd &= ~RxChkSum; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) cp->cpcmd |= RxVlanOn; else cp->cpcmd &= ~RxVlanOn; @@ -1955,14 +1955,14 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &cp_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; /* disabled by default until verified */ dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 9a1bc1a2385..86d5d7909d1 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1793,16 +1793,17 @@ static void __rtl8169_set_features(struct net_device *dev, netdev_features_t changed = features ^ dev->features; void __iomem *ioaddr = tp->mmio_addr; - if (!(changed & (NETIF_F_RXALL | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX))) + if (!(changed & (NETIF_F_RXALL | NETIF_F_RXCSUM | + NETIF_F_HW_VLAN_CTAG_RX))) return; - if (changed & (NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX)) { + if (changed & (NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX)) { if (features & NETIF_F_RXCSUM) tp->cp_cmd |= RxChkSum; else tp->cp_cmd &= ~RxChkSum; - if (dev->features & NETIF_F_HW_VLAN_RX) + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) tp->cp_cmd |= RxVlan; else tp->cp_cmd &= ~RxVlan; @@ -7036,16 +7037,17 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* don't enable SG, IP_CSUM and TSO by default - it might not work * properly for all devices */ dev->features |= NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_RXCSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_HIGHDMA; if (tp->mac_version == RTL_GIGA_MAC_VER_05) /* 8110SCd requires hardware Rx VLAN - disallow toggling */ - dev->hw_features &= ~NETIF_F_HW_VLAN_RX; + dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index a7499cbf450..3d4a1ed0a7a 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2749,7 +2749,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) goto out_release; } mdp->port = devno % 2; - ndev->features = NETIF_F_HW_VLAN_FILTER; + ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } /* initialize first or needed device */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 71b64857e3a..618446ae1ec 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2679,7 +2679,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, ndev->watchdog_timeo = msecs_to_jiffies(watchdog); #ifdef STMMAC_VLAN_TAG_USED /* Both mac100 and gmac support receive VLAN tag detection */ - ndev->features |= NETIF_F_HW_VLAN_RX; + ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; #endif priv->msg_enable = netif_msg_init(debug, default_msg_level); diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index e8824cea093..5ca4b33fc4c 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -2017,12 +2017,12 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * so we can have them same for all ports of the board */ ndev->if_port = port; ndev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO - | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER | NETIF_F_RXCSUM + | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM /*| NETIF_F_FRAGLIST */ ; ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_HW_VLAN_TX; + NETIF_F_TSO | NETIF_F_HW_VLAN_CTAG_TX; if (pci_using_dac) ndev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1d740423a05..084992981ce 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1599,7 +1599,7 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, priv_sl2->num_irqs = priv->num_irqs; } - ndev->features |= NETIF_F_HW_VLAN_FILTER; + ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); @@ -1837,7 +1837,7 @@ static int cpsw_probe(struct platform_device *pdev) k++; } - ndev->features |= NETIF_F_HW_VLAN_FILTER; + ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index fef6b59e69c..c655fe60121 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2329,8 +2329,8 @@ spider_net_setup_netdev(struct spider_net_card *card) if (SPIDER_NET_RX_CSUM_DEFAULT) netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_IP_CSUM | NETIF_F_LLTX; - /* some time: NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - * NETIF_F_HW_VLAN_FILTER */ + /* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + * NETIF_F_HW_VLAN_CTAG_FILTER */ netdev->irq = card->pdev->irq; card->num_rx_ints = 0; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 185c721c52d..37b02c3768b 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -1026,8 +1026,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM; if (pdev->revision >= VT6105M) - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; /* dev->name not defined before register_netdev()! */ rc = register_netdev(dev); diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 1bc7f9fd258..c1c55a7da94 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2810,9 +2810,10 @@ static int velocity_found1(struct pci_dev *pdev, dev->ethtool_ops = &velocity_ethtool_ops; netif_napi_add(dev, &vptr->napi, velocity_poll, VELOCITY_NAPI_WEIGHT); - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HW_VLAN_TX; - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER | - NETIF_F_HW_VLAN_RX | NETIF_F_IP_CSUM; + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_HW_VLAN_CTAG_TX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_IP_CSUM; ret = register_netdev(dev); if (ret < 0) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 4a7c60f4c83..57c2e5ef280 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1018,9 +1018,9 @@ static int temac_of_probe(struct platform_device *op) ndev->features |= NETIF_F_HW_CSUM; /* Can checksum all the packets. */ ndev->features |= NETIF_F_IPV6_CSUM; /* Can checksum IPV6 TCP/UDP */ ndev->features |= NETIF_F_HIGHDMA; /* Can DMA to high memory. */ - ndev->features |= NETIF_F_HW_VLAN_TX; /* Transmit VLAN hw accel */ - ndev->features |= NETIF_F_HW_VLAN_RX; /* Receive VLAN hw acceleration */ - ndev->features |= NETIF_F_HW_VLAN_FILTER; /* Receive VLAN filtering */ + ndev->features |= NETIF_F_HW_VLAN_CTAG_TX; /* Transmit VLAN hw accel */ + ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; /* Receive VLAN hw acceleration */ + ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; /* Receive VLAN filtering */ ndev->features |= NETIF_F_VLAN_CHALLENGED; /* cannot handle VLAN pkts */ ndev->features |= NETIF_F_GSO; /* Enable software GSO. */ ndev->features |= NETIF_F_MULTI_QUEUE; /* Has multiple TX/RX queues */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5f85205cd12..4559bb8115b 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -431,7 +431,7 @@ static int netvsc_probe(struct hv_device *dev, /* TODO: Add GSO and Checksum offload */ net->hw_features = NETIF_F_SG; - net->features = NETIF_F_SG | NETIF_F_HW_VLAN_TX; + net->features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_TX; SET_ETHTOOL_OPS(net, ðtool_ops); SET_NETDEV_DEV(net, &dev->device); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 82164381f77..724ce7a36c9 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -166,7 +166,7 @@ static const struct net_device_ops ifb_netdev_ops = { #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \ NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6 | \ - NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX) + NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX) static void ifb_setup(struct net_device *dev) { diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 70af6dc07d4..fedd34cff91 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -471,7 +471,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ - NETIF_F_HW_VLAN_FILTER) + NETIF_F_HW_VLAN_CTAG_FILTER) #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 9a31e8e50fa..9290eb23d66 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1841,9 +1841,9 @@ static void team_setup(struct net_device *dev) dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GRO; dev->hw_features = TEAM_VLAN_FEATURES | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); dev->features |= dev->hw_features; diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 16c84299729..b7e9cd7ca6d 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -101,7 +101,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->flags |= IFF_NOARP; /* no need to put the VLAN tci in the packet headers */ - dev->net->features |= NETIF_F_HW_VLAN_TX; + dev->net->features |= NETIF_F_HW_VLAN_CTAG_TX; err: return ret; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 07a4af0aa3d..f116c593d87 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -255,7 +255,7 @@ static const struct net_device_ops veth_netdev_ops = { #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \ - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX) + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX) static void veth_setup(struct net_device *dev) { diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8fdfde6832a..b61d57ab3c6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1376,7 +1376,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) if (vi->has_cvq) { vi->cvq = vqs[total_vqs - 1]; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) - vi->dev->features |= NETIF_F_HW_VLAN_FILTER; + vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } for (i = 0; i < vi->max_queue_pairs; i++) { diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index eae7a03d4f9..ba9bdad3998 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2107,7 +2107,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) devRead->misc.uptFeatures |= UPT1_F_LRO; devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS); } - if (adapter->netdev->features & NETIF_F_HW_VLAN_RX) + if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) devRead->misc.uptFeatures |= UPT1_F_RXVLAN; devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu); @@ -2669,14 +2669,15 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) struct net_device *netdev = adapter->netdev; netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_LRO; if (dma64) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->vlan_features = netdev->hw_features & - ~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX); - netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_FILTER; + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX); + netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; } diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 63a124340cb..600ab56c000 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -263,7 +263,8 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) unsigned long flags; netdev_features_t changed = features ^ netdev->features; - if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_HW_VLAN_RX)) { + if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO | + NETIF_F_HW_VLAN_CTAG_RX)) { if (features & NETIF_F_RXCSUM) adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXCSUM; @@ -279,7 +280,7 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) adapter->shared->devRead.misc.uptFeatures &= ~UPT1_F_LRO; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXVLAN; else diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d690166efea..90ddd823605 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -959,7 +959,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) SET_ETHTOOL_OPS(card->dev, &qeth_l2_ethtool_ops); else SET_ETHTOOL_OPS(card->dev, &qeth_l2_osn_ops); - card->dev->features |= NETIF_F_HW_VLAN_FILTER; + card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); SET_NETDEV_DEV(card->dev, &card->gdev->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 8710337dab3..04261bc08f2 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3294,9 +3294,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; SET_ETHTOOL_OPS(card->dev, &qeth_l3_ethtool_ops); - card->dev->features |= NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; card->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; card->dev->gso_max_size = 15 * PAGE_SIZE; diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 9bfdc9a3f89..292b24f9bf9 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1655,7 +1655,7 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp) skb->priority = fcoe->priority; if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN && - fcoe->realdev->features & NETIF_F_HW_VLAN_TX) { + fcoe->realdev->features & NETIF_F_HW_VLAN_CTAG_TX) { skb->vlan_tci = VLAN_TAG_PRESENT | vlan_dev_vlan_id(fcoe->netdev); skb->dev = fcoe->realdev; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 70962f3fdb7..fee28291a82 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -238,7 +238,7 @@ static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, */ static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) { - if (skb->dev->features & NETIF_F_HW_VLAN_TX) { + if (skb->dev->features & NETIF_F_HW_VLAN_CTAG_TX) { return __vlan_hwaccel_put_tag(skb, vlan_tci); } else { return __vlan_put_tag(skb, vlan_tci); @@ -294,7 +294,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, */ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { - if (skb->dev->features & NETIF_F_HW_VLAN_TX) { + if (skb->dev->features & NETIF_F_HW_VLAN_CTAG_TX) { return __vlan_hwaccel_get_tag(skb, vlan_tci); } else { return __vlan_get_tag(skb, vlan_tci); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index d6ee2d008ee..785913b8983 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -22,9 +22,9 @@ enum { NETIF_F_IPV6_CSUM_BIT, /* Can checksum TCP/UDP over IPV6 */ NETIF_F_HIGHDMA_BIT, /* Can DMA to high memory. */ NETIF_F_FRAGLIST_BIT, /* Scatter/gather IO. */ - NETIF_F_HW_VLAN_TX_BIT, /* Transmit VLAN hw acceleration */ - NETIF_F_HW_VLAN_RX_BIT, /* Receive VLAN hw acceleration */ - NETIF_F_HW_VLAN_FILTER_BIT, /* Receive filtering on VLAN */ + NETIF_F_HW_VLAN_CTAG_TX_BIT, /* Transmit VLAN CTAG HW acceleration */ + NETIF_F_HW_VLAN_CTAG_RX_BIT, /* Receive VLAN CTAG HW acceleration */ + NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */ NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ NETIF_F_LLTX_BIT, /* LockLess TX - deprecated. Please */ @@ -80,9 +80,9 @@ enum { #define NETIF_F_GSO_ROBUST __NETIF_F(GSO_ROBUST) #define NETIF_F_HIGHDMA __NETIF_F(HIGHDMA) #define NETIF_F_HW_CSUM __NETIF_F(HW_CSUM) -#define NETIF_F_HW_VLAN_FILTER __NETIF_F(HW_VLAN_FILTER) -#define NETIF_F_HW_VLAN_RX __NETIF_F(HW_VLAN_RX) -#define NETIF_F_HW_VLAN_TX __NETIF_F(HW_VLAN_TX) +#define NETIF_F_HW_VLAN_CTAG_FILTER __NETIF_F(HW_VLAN_CTAG_FILTER) +#define NETIF_F_HW_VLAN_CTAG_RX __NETIF_F(HW_VLAN_CTAG_RX) +#define NETIF_F_HW_VLAN_CTAG_TX __NETIF_F(HW_VLAN_CTAG_TX) #define NETIF_F_IP_CSUM __NETIF_F(IP_CSUM) #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) #define NETIF_F_LLTX __NETIF_F(LLTX) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 623b57b5219..7eb7e03ee41 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -785,11 +785,13 @@ struct netdev_fcoe_hbainfo { * neither operation. * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); - * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * If device support VLAN filtering (dev->features & + * NETIF_F_HW_VLAN_CTAG_FILTER) * this function is called when a VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); - * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * If device support VLAN filtering (dev->features & + * NETIF_F_HW_VLAN_CTAG_FILTER) * this function is called when a VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 85addcd9372..d913feed075 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -301,7 +301,7 @@ static void vlan_transfer_features(struct net_device *dev, { vlandev->gso_max_size = dev->gso_max_size; - if (dev->features & NETIF_F_HW_VLAN_TX) + if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; @@ -347,7 +347,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, __vlan_device_event(dev, event); if ((event == NETDEV_UP) && - (dev->features & NETIF_F_HW_VLAN_FILTER)) { + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); vlan_vid_add(dev, 0); @@ -415,7 +415,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_DOWN: - if (dev->features & NETIF_F_HW_VLAN_FILTER) + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) vlan_vid_del(dev, 0); /* Put all VLANs for this dev in the down state too. */ diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f3b6f515eba..3df29d34470 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -225,7 +225,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, if (!vid_info) return -ENOMEM; - if (dev->features & NETIF_F_HW_VLAN_FILTER) { + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { err = ops->ndo_vlan_rx_add_vid(dev, vid); if (err) { kfree(vid_info); @@ -282,7 +282,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, unsigned short vid = vid_info->vid; int err; - if (dev->features & NETIF_F_HW_VLAN_FILTER) { + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { err = ops->ndo_vlan_rx_kill_vid(dev, vid); if (err) { pr_warn("failed to kill vid %d for device %s\n", diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 19cf81bf9f6..5c4892a8641 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -583,7 +583,7 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (real_dev->features & NETIF_F_HW_VLAN_TX) { + if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 314c73ed418..967312803e4 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -348,10 +348,10 @@ void br_dev_setup(struct net_device *dev) dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX; + NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; br->dev = dev; spin_lock_init(&br->lock); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 93dde75923f..0b3dbbec80d 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -54,7 +54,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) dev = br->dev; } - if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { + if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { /* Add VLAN to the device filter if it is supported. * Stricly speaking, this is not necessary now, since * devices are made promiscuous by the bridge, but if @@ -82,7 +82,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) return 0; out_filt: - if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) + if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); return err; } @@ -98,7 +98,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; - if (dev->features & NETIF_F_HW_VLAN_FILTER) + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); } diff --git a/net/core/dev.c b/net/core/dev.c index 3655ff92731..07a8e9dc43f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2435,13 +2435,13 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, protocol, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX); if (protocol != htons(ETH_P_8021Q)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX; return harmonize_features(skb, protocol, features); } } @@ -2482,7 +2482,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && - !(features & NETIF_F_HW_VLAN_TX)) { + !(features & NETIF_F_HW_VLAN_CTAG_TX)) { skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (unlikely(!skb)) goto out; @@ -5180,7 +5180,8 @@ int register_netdevice(struct net_device *dev) } } - if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && + if (((dev->hw_features | dev->features) & + NETIF_F_HW_VLAN_CTAG_FILTER) && (!dev->netdev_ops->ndo_vlan_rx_add_vid || !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index adc1351e687..b87712cfd26 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -60,10 +60,10 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", [NETIF_F_HIGHDMA_BIT] = "highdma", [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", - [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert", + [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-ctag-hw-insert", - [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse", - [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter", + [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-ctag-hw-parse", + [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-ctag-filter", [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", [NETIF_F_GSO_BIT] = "tx-generic-segmentation", [NETIF_F_LLTX_BIT] = "tx-lockless", @@ -267,18 +267,19 @@ static int ethtool_set_one_feature(struct net_device *dev, #define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) -#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ - NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) +#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \ + NETIF_F_RXHASH) static u32 __ethtool_get_flags(struct net_device *dev) { u32 flags = 0; - if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; - if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN; - if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN; - if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; - if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; + if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; + if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; + if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; + if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; return flags; } @@ -291,8 +292,8 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return -EINVAL; if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; - if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX; - if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX; + if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX; + if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX; if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a3a17aed363..8de961e67cf 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -383,7 +383,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (__netif_tx_trylock(txq)) { if (!netif_xmit_stopped(txq)) { if (vlan_tx_tag_present(skb) && - !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { + !(netif_skb_features(skb) & NETIF_F_HW_VLAN_CTAG_TX)) { skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (unlikely(!skb)) break; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 9604760494b..73682de8dc6 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -137,7 +137,7 @@ static void do_setup(struct net_device *netdev) NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; netdev->vlan_features = netdev->features; - netdev->features |= NETIF_F_HW_VLAN_TX; + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features = netdev->features & ~NETIF_F_LLTX; eth_hw_addr_random(netdev); } -- cgit v1.2.3-70-g09d2 From 80d5c3689b886308247da295a228a54df49a44f6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 19 Apr 2013 02:04:28 +0000 Subject: net: vlan: prepare for 802.1ad VLAN filtering offload Change the rx_{add,kill}_vid callbacks to take a protocol argument in preparation of 802.1ad support. The protocol argument used so far is always htons(ETH_P_8021Q). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 17 ++++---- drivers/net/ethernet/adaptec/starfire.c | 6 ++- drivers/net/ethernet/brocade/bna/bnad.c | 6 +-- drivers/net/ethernet/cisco/enic/enic_dev.c | 4 +- drivers/net/ethernet/cisco/enic/enic_dev.h | 4 +- drivers/net/ethernet/emulex/benet/be_main.c | 4 +- drivers/net/ethernet/ibm/ehea/ehea_main.c | 4 +- drivers/net/ethernet/intel/e1000/e1000_main.c | 22 +++++++---- drivers/net/ethernet/intel/e1000e/netdev.c | 20 ++++++---- drivers/net/ethernet/intel/igb/igb_main.c | 12 +++--- drivers/net/ethernet/intel/igbvf/netdev.c | 8 ++-- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 12 +++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 +++-- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 9 +++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 ++- drivers/net/ethernet/neterion/vxge/vxge-main.c | 8 ++-- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 ++-- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 4 +- drivers/net/ethernet/renesas/sh_eth.c | 6 ++- drivers/net/ethernet/tehuti/tehuti.c | 4 +- drivers/net/ethernet/ti/cpsw.c | 4 +- drivers/net/ethernet/via/via-rhine.c | 10 +++-- drivers/net/ethernet/via/via-velocity.c | 6 ++- drivers/net/macvlan.c | 8 ++-- drivers/net/team/team.c | 10 ++--- drivers/net/virtio_net.c | 6 ++- drivers/net/vmxnet3/vmxnet3_drv.c | 4 +- drivers/s390/net/qeth_l2_main.c | 6 ++- drivers/s390/net/qeth_l3_main.c | 6 ++- include/linux/if_vlan.h | 4 +- include/linux/netdevice.h | 16 ++++---- net/8021q/vlan.c | 10 ++--- net/8021q/vlan_core.c | 47 +++++++++++++---------- net/bridge/br_vlan.c | 10 +++-- 34 files changed, 184 insertions(+), 137 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8d324f8a175..35e89e12a1f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -428,14 +428,15 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, * @bond_dev: bonding net device that got called * @vid: vlan id being added */ -static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) +static int bond_vlan_rx_add_vid(struct net_device *bond_dev, + __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *stop_at; int i, res; bond_for_each_slave(bond, slave, i) { - res = vlan_vid_add(slave->dev, vid); + res = vlan_vid_add(slave->dev, proto, vid); if (res) goto unwind; } @@ -453,7 +454,7 @@ unwind: /* unwind from head to the slave that failed */ stop_at = slave; bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) - vlan_vid_del(slave->dev, vid); + vlan_vid_del(slave->dev, proto, vid); return res; } @@ -463,14 +464,15 @@ unwind: * @bond_dev: bonding net device that got called * @vid: vlan id being removed */ -static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) +static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, + __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; int i, res; bond_for_each_slave(bond, slave, i) - vlan_vid_del(slave->dev, vid); + vlan_vid_del(slave->dev, proto, vid); res = bond_del_vlan(bond, vid); if (res) { @@ -488,7 +490,8 @@ static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *sla int res; list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - res = vlan_vid_add(slave_dev, vlan->vlan_id); + res = vlan_vid_add(slave_dev, htons(ETH_P_8021Q), + vlan->vlan_id); if (res) pr_warning("%s: Failed to add vlan id %d to device %s\n", bond->dev->name, vlan->vlan_id, @@ -504,7 +507,7 @@ static void bond_del_vlans_from_slave(struct bonding *bond, list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { if (!vlan->vlan_id) continue; - vlan_vid_del(slave_dev, vlan->vlan_id); + vlan_vid_del(slave_dev, htons(ETH_P_8021Q), vlan->vlan_id); } } diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 365865130f7..cdbc5443ae3 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -594,7 +594,8 @@ static const struct ethtool_ops ethtool_ops; #ifdef VLAN_SUPPORT -static int netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int netdev_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct netdev_private *np = netdev_priv(dev); @@ -608,7 +609,8 @@ static int netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int netdev_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct netdev_private *np = netdev_priv(dev); diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 1d9d0371a74..c0bc44e51fa 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3068,8 +3068,7 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu) } static int -bnad_vlan_rx_add_vid(struct net_device *netdev, - unsigned short vid) +bnad_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct bnad *bnad = netdev_priv(netdev); unsigned long flags; @@ -3090,8 +3089,7 @@ bnad_vlan_rx_add_vid(struct net_device *netdev, } static int -bnad_vlan_rx_kill_vid(struct net_device *netdev, - unsigned short vid) +bnad_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct bnad *bnad = netdev_priv(netdev); unsigned long flags; diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.c b/drivers/net/ethernet/cisco/enic/enic_dev.c index bf0fc56dba1..4b6e5695b26 100644 --- a/drivers/net/ethernet/cisco/enic/enic_dev.c +++ b/drivers/net/ethernet/cisco/enic/enic_dev.c @@ -212,7 +212,7 @@ int enic_dev_deinit_done(struct enic *enic, int *status) } /* rtnl lock is held */ -int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +int enic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct enic *enic = netdev_priv(netdev); int err; @@ -225,7 +225,7 @@ int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid) } /* rtnl lock is held */ -int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +int enic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct enic *enic = netdev_priv(netdev); int err; diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.h b/drivers/net/ethernet/cisco/enic/enic_dev.h index da1cba3c410..08bded051b9 100644 --- a/drivers/net/ethernet/cisco/enic/enic_dev.h +++ b/drivers/net/ethernet/cisco/enic/enic_dev.h @@ -46,8 +46,8 @@ int enic_dev_packet_filter(struct enic *enic, int directed, int multicast, int broadcast, int promisc, int allmulti); int enic_dev_add_addr(struct enic *enic, u8 *addr); int enic_dev_del_addr(struct enic *enic, u8 *addr); -int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid); -int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid); +int enic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid); +int enic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); int enic_dev_notify_unset(struct enic *enic); int enic_dev_hang_notify(struct enic *enic); int enic_dev_set_ig_vlan_rewrite_mode(struct enic *enic); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index bde26d4d52e..b4133318491 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -902,7 +902,7 @@ set_vlan_promisc: return status; } -static int be_vlan_add_vid(struct net_device *netdev, u16 vid) +static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct be_adapter *adapter = netdev_priv(netdev); int status = 0; @@ -928,7 +928,7 @@ ret: return status; } -static int be_vlan_rem_vid(struct net_device *netdev, u16 vid) +static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct be_adapter *adapter = netdev_priv(netdev); int status = 0; diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 9c9fa745ff8..d1812aacbc7 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2110,7 +2110,7 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int ehea_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ehea_port *port = netdev_priv(dev); struct ehea_adapter *adapter = port->adapter; @@ -2148,7 +2148,7 @@ out: return err; } -static int ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int ehea_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ehea_port *port = netdev_priv(dev); struct ehea_adapter *adapter = port->adapter; diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 8d0d0d420c2..8239dafdd8e 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -166,8 +166,10 @@ static void e1000_vlan_mode(struct net_device *netdev, netdev_features_t features); static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter, bool filter_on); -static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid); -static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid); +static int e1000_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid); +static int e1000_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid); static void e1000_restore_vlan(struct e1000_adapter *adapter); #ifdef CONFIG_PM @@ -333,7 +335,7 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) if (!test_bit(vid, adapter->active_vlans)) { if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) { - e1000_vlan_rx_add_vid(netdev, vid); + e1000_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid); adapter->mng_vlan_id = vid; } else { adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; @@ -341,7 +343,8 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid) && !test_bit(old_vid, adapter->active_vlans)) - e1000_vlan_rx_kill_vid(netdev, old_vid); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + old_vid); } else { adapter->mng_vlan_id = vid; } @@ -1457,7 +1460,8 @@ static int e1000_close(struct net_device *netdev) if ((hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) && !test_bit(adapter->mng_vlan_id, adapter->active_vlans)) { - e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + adapter->mng_vlan_id); } return 0; @@ -4837,7 +4841,8 @@ static void e1000_vlan_mode(struct net_device *netdev, e1000_irq_enable(adapter); } -static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -4862,7 +4867,8 @@ static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -4896,7 +4902,7 @@ static void e1000_restore_vlan(struct e1000_adapter *adapter) e1000_vlan_filter_on_off(adapter, true); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - e1000_vlan_rx_add_vid(adapter->netdev, vid); + e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a2e7db33bf9..8c17f01a155 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -2672,7 +2672,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight) return work_done; } -static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -2697,7 +2698,8 @@ static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -2741,7 +2743,8 @@ static void e1000e_vlan_filter_disable(struct e1000_adapter *adapter) ew32(RCTL, rctl); if (adapter->mng_vlan_id != (u16)E1000_MNG_VLAN_NONE) { - e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + adapter->mng_vlan_id); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; } } @@ -2802,22 +2805,22 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) u16 old_vid = adapter->mng_vlan_id; if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { - e1000_vlan_rx_add_vid(netdev, vid); + e1000_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid); adapter->mng_vlan_id = vid; } if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid)) - e1000_vlan_rx_kill_vid(netdev, old_vid); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), old_vid); } static void e1000_restore_vlan(struct e1000_adapter *adapter) { u16 vid; - e1000_vlan_rx_add_vid(adapter->netdev, 0); + e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - e1000_vlan_rx_add_vid(adapter->netdev, vid); + e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } static void e1000_init_manageability_pt(struct e1000_adapter *adapter) @@ -4384,7 +4387,8 @@ static int e1000_close(struct net_device *netdev) * the same ID is registered on the host OS (let 8021q kill it) */ if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) - e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + adapter->mng_vlan_id); /* If AMT is enabled, let the firmware know that the network * interface is now closed diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b0b1777c0af..d13ea71c7c1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -159,8 +159,8 @@ static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); static void igb_tx_timeout(struct net_device *); static void igb_reset_task(struct work_struct *); static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); -static int igb_vlan_rx_add_vid(struct net_device *, u16); -static int igb_vlan_rx_kill_vid(struct net_device *, u16); +static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16); static void igb_restore_vlan(struct igb_adapter *); static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); static void igb_ping_all_vfs(struct igb_adapter *); @@ -6976,7 +6976,8 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) igb_rlpml_set(adapter); } -static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int igb_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -6993,7 +6994,8 @@ static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int igb_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -7019,7 +7021,7 @@ static void igb_restore_vlan(struct igb_adapter *adapter) igb_vlan_mode(adapter->netdev, adapter->netdev->features); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - igb_vlan_rx_add_vid(adapter->netdev, vid); + igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 33e7b3069fb..3854fd698b8 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1230,7 +1230,8 @@ static void igbvf_set_rlpml(struct igbvf_adapter *adapter) e1000_rlpml_set_vf(hw, max_frame_size); } -static int igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int igbvf_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -1243,7 +1244,8 @@ static int igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -1262,7 +1264,7 @@ static void igbvf_restore_vlan(struct igbvf_adapter *adapter) u16 vid; for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - igbvf_vlan_rx_add_vid(adapter->netdev, vid); + igbvf_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } /** diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index e65d9e91022..a32f274acd3 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -101,8 +101,10 @@ static void ixgb_tx_timeout_task(struct work_struct *work); static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter); static void ixgb_vlan_strip_disable(struct ixgb_adapter *adapter); -static int ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid); -static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid); +static int ixgb_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid); +static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid); static void ixgb_restore_vlan(struct ixgb_adapter *adapter); #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2209,7 +2211,7 @@ ixgb_vlan_strip_disable(struct ixgb_adapter *adapter) } static int -ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +ixgb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ixgb_adapter *adapter = netdev_priv(netdev); u32 vfta, index; @@ -2226,7 +2228,7 @@ ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) } static int -ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +ixgb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ixgb_adapter *adapter = netdev_priv(netdev); u32 vfta, index; @@ -2248,7 +2250,7 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter) u16 vid; for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - ixgb_vlan_rx_add_vid(adapter->netdev, vid); + ixgb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0316b65dfe0..3becffc7732 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3467,7 +3467,8 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) hw->mac.ops.enable_rx_dma(hw, rxctrl); } -static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -3479,7 +3480,8 @@ static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -3584,10 +3586,10 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) { u16 vid; - ixgbe_vlan_rx_add_vid(adapter->netdev, 0); + ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - ixgbe_vlan_rx_add_vid(adapter->netdev, vid); + ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } /** diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 8f907b7af31..4bc1f84c935 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1179,7 +1179,8 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) } } -static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -1204,7 +1205,8 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return err; } -static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -1227,7 +1229,8 @@ static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter) u16 vid; for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - ixgbevf_vlan_rx_add_vid(adapter->netdev, vid); + ixgbevf_vlan_rx_add_vid(adapter->netdev, + htons(ETH_P_8021Q), vid); } static int ixgbevf_write_uc_addr_list(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index b2ba39c7143..e7e27842d8d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -356,7 +356,8 @@ static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) } #endif -static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -381,7 +382,8 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index e9e58aadf87..a9396142201 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3300,12 +3300,13 @@ static void vxge_tx_watchdog(struct net_device *dev) /** * vxge_vlan_rx_add_vid * @dev: net device pointer. + * @proto: vlan protocol * @vid: vid * * Add the vlan id to the devices vlan id table */ static int -vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +vxge_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct vxgedev *vdev = netdev_priv(dev); struct vxge_vpath *vpath; @@ -3323,14 +3324,15 @@ vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) } /** - * vxge_vlan_rx_add_vid + * vxge_vlan_rx_kill_vid * @dev: net device pointer. + * @proto: vlan protocol * @vid: vid * * Remove the vlan id from the device's vlan id table */ static int -vxge_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +vxge_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct vxgedev *vdev = netdev_priv(dev); struct vxge_vpath *vpath; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 845ba1d1c3c..e88e01312c6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -86,8 +86,8 @@ static void qlcnic_dev_set_npar_ready(struct qlcnic_adapter *); static int qlcnicvf_start_firmware(struct qlcnic_adapter *); static void qlcnic_set_netdev_features(struct qlcnic_adapter *, struct qlcnic_esw_func_cfg *); -static int qlcnic_vlan_rx_add(struct net_device *, u16); -static int qlcnic_vlan_rx_del(struct net_device *, u16); +static int qlcnic_vlan_rx_add(struct net_device *, __be16, u16); +static int qlcnic_vlan_rx_del(struct net_device *, __be16, u16); #define QLCNIC_IS_TSO_CAPABLE(adapter) \ ((adapter)->ahw->capabilities & QLCNIC_FW_CAPABILITY_TSO) @@ -902,7 +902,7 @@ void qlcnic_set_vlan_config(struct qlcnic_adapter *adapter, } static int -qlcnic_vlan_rx_add(struct net_device *netdev, u16 vid) +qlcnic_vlan_rx_add(struct net_device *netdev, __be16 proto, u16 vid) { struct qlcnic_adapter *adapter = netdev_priv(netdev); set_bit(vid, adapter->vlans); @@ -910,7 +910,7 @@ qlcnic_vlan_rx_add(struct net_device *netdev, u16 vid) } static int -qlcnic_vlan_rx_del(struct net_device *netdev, u16 vid) +qlcnic_vlan_rx_del(struct net_device *netdev, __be16 proto, u16 vid) { struct qlcnic_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 8e3f43c7566..a9016acc2d6 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2326,7 +2326,7 @@ static int __qlge_vlan_rx_add_vid(struct ql_adapter *qdev, u16 vid) return err; } -static int qlge_vlan_rx_add_vid(struct net_device *ndev, u16 vid) +static int qlge_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct ql_adapter *qdev = netdev_priv(ndev); int status; @@ -2357,7 +2357,7 @@ static int __qlge_vlan_rx_kill_vid(struct ql_adapter *qdev, u16 vid) return err; } -static int qlge_vlan_rx_kill_vid(struct net_device *ndev, u16 vid) +static int qlge_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct ql_adapter *qdev = netdev_priv(ndev); int status; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 3d4a1ed0a7a..b8e52cd1a69 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2448,7 +2448,8 @@ static int sh_eth_get_vtag_index(struct sh_eth_private *mdp) return TSU_VTAG1; } -static int sh_eth_vlan_rx_add_vid(struct net_device *ndev, u16 vid) +static int sh_eth_vlan_rx_add_vid(struct net_device *ndev, + __be16 proto, u16 vid) { struct sh_eth_private *mdp = netdev_priv(ndev); int vtag_reg_index = sh_eth_get_vtag_index(mdp); @@ -2478,7 +2479,8 @@ static int sh_eth_vlan_rx_add_vid(struct net_device *ndev, u16 vid) return 0; } -static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev, u16 vid) +static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev, + __be16 proto, u16 vid) { struct sh_eth_private *mdp = netdev_priv(ndev); int vtag_reg_index = sh_eth_get_vtag_index(mdp); diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 5ca4b33fc4c..f7050c57351 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -733,7 +733,7 @@ static void __bdx_vlan_rx_vid(struct net_device *ndev, uint16_t vid, int enable) * @ndev: network device * @vid: VLAN vid to add */ -static int bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid) +static int bdx_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { __bdx_vlan_rx_vid(ndev, vid, 1); return 0; @@ -744,7 +744,7 @@ static int bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid) * @ndev: network device * @vid: VLAN vid to kill */ -static int bdx_vlan_rx_kill_vid(struct net_device *ndev, unsigned short vid) +static int bdx_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { __bdx_vlan_rx_vid(ndev, vid, 0); return 0; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 084992981ce..5cf8d03b8ca 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1251,7 +1251,7 @@ clean_vid: } static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, - unsigned short vid) + __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); @@ -1263,7 +1263,7 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, } static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, - unsigned short vid) + __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); int ret; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 37b02c3768b..c6014916f62 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -508,8 +508,10 @@ static struct rtnl_link_stats64 *rhine_get_stats64(struct net_device *dev, static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static const struct ethtool_ops netdev_ethtool_ops; static int rhine_close(struct net_device *dev); -static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid); -static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid); +static int rhine_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid); +static int rhine_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid); static void rhine_restart_tx(struct net_device *dev); static void rhine_wait_bit(struct rhine_private *rp, u8 reg, u8 mask, bool low) @@ -1415,7 +1417,7 @@ static void rhine_update_vcam(struct net_device *dev) rhine_set_vlan_cam_mask(ioaddr, vCAMmask); } -static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int rhine_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct rhine_private *rp = netdev_priv(dev); @@ -1426,7 +1428,7 @@ static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int rhine_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct rhine_private *rp = netdev_priv(dev); diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index c1c55a7da94..91cd59146c2 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -525,7 +525,8 @@ static void velocity_init_cam_filter(struct velocity_info *vptr) mac_set_vlan_cam_mask(regs, vptr->vCAMmask); } -static int velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int velocity_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct velocity_info *vptr = netdev_priv(dev); @@ -536,7 +537,8 @@ static int velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int velocity_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct velocity_info *vptr = netdev_priv(dev); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index fedd34cff91..7347cdbe736 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -567,21 +567,21 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, } static int macvlan_vlan_rx_add_vid(struct net_device *dev, - unsigned short vid) + __be16 proto, u16 vid) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - return vlan_vid_add(lowerdev, vid); + return vlan_vid_add(lowerdev, proto, vid); } static int macvlan_vlan_rx_kill_vid(struct net_device *dev, - unsigned short vid) + __be16 proto, u16 vid) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - vlan_vid_del(lowerdev, vid); + vlan_vid_del(lowerdev, proto, vid); return 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 9290eb23d66..7c43261975b 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1598,7 +1598,7 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) return stats; } -static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid) +static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct team *team = netdev_priv(dev); struct team_port *port; @@ -1610,7 +1610,7 @@ static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid) */ mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { - err = vlan_vid_add(port->dev, vid); + err = vlan_vid_add(port->dev, proto, vid); if (err) goto unwind; } @@ -1620,20 +1620,20 @@ static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid) unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) - vlan_vid_del(port->dev, vid); + vlan_vid_del(port->dev, proto, vid); mutex_unlock(&team->lock); return err; } -static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid) +static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct team *team = netdev_priv(dev); struct team_port *port; rcu_read_lock(); list_for_each_entry_rcu(port, &team->port_list, list) - vlan_vid_del(port->dev, vid); + vlan_vid_del(port->dev, proto, vid); rcu_read_unlock(); return 0; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b61d57ab3c6..50077753a0e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1006,7 +1006,8 @@ static void virtnet_set_rx_mode(struct net_device *dev) kfree(buf); } -static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid) +static int virtnet_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; @@ -1019,7 +1020,8 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid) return 0; } -static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid) +static int virtnet_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ba9bdad3998..27b889992ab 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1931,7 +1931,7 @@ vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter) static int -vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); @@ -1953,7 +1953,7 @@ vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid) static int -vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 90ddd823605..e68f79b3855 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -302,7 +302,8 @@ static void qeth_l2_process_vlans(struct qeth_card *card) spin_unlock_bh(&card->vlanlock); } -static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; struct qeth_vlan_vid *id; @@ -331,7 +332,8 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_vlan_vid *id, *tmpid = NULL; struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 04261bc08f2..642686846a1 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1824,7 +1824,8 @@ static void qeth_l3_free_vlan_addresses(struct qeth_card *card, rcu_read_unlock(); } -static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; @@ -1832,7 +1833,8 @@ static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; unsigned long flags; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index fee28291a82..fcb9ef82aae 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -93,8 +93,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern bool vlan_do_receive(struct sk_buff **skb); extern struct sk_buff *vlan_untag(struct sk_buff *skb); -extern int vlan_vid_add(struct net_device *dev, unsigned short vid); -extern void vlan_vid_del(struct net_device *dev, unsigned short vid); +extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid); +extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid); extern int vlan_vids_add_by_dev(struct net_device *dev, const struct net_device *by_dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7eb7e03ee41..f8898a435dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -784,15 +784,13 @@ struct netdev_fcoe_hbainfo { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * - * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); - * If device support VLAN filtering (dev->features & - * NETIF_F_HW_VLAN_CTAG_FILTER) - * this function is called when a VLAN id is registered. + * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16t vid); + * If device support VLAN filtering this function is called when a + * VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); - * If device support VLAN filtering (dev->features & - * NETIF_F_HW_VLAN_CTAG_FILTER) - * this function is called when a VLAN id is unregistered. + * If device support VLAN filtering this function is called when a + * VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); * @@ -936,9 +934,9 @@ struct net_device_ops { struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); + __be16 proto, u16 vid); int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); + __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); int (*ndo_netpoll_setup)(struct net_device *dev, diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index d913feed075..447c5c93434 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -112,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) * VLAN is not 0 (leave it there for 802.1p). */ if (vlan_id) - vlan_vid_del(real_dev, vlan_id); + vlan_vid_del(real_dev, htons(ETH_P_8021Q), vlan_id); /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); @@ -142,7 +142,7 @@ int register_vlan_dev(struct net_device *dev) struct vlan_group *grp; int err; - err = vlan_vid_add(real_dev, vlan_id); + err = vlan_vid_add(real_dev, htons(ETH_P_8021Q), vlan_id); if (err) return err; @@ -195,7 +195,7 @@ out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: - vlan_vid_del(real_dev, vlan_id); + vlan_vid_del(real_dev, htons(ETH_P_8021Q), vlan_id); return err; } @@ -350,7 +350,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); - vlan_vid_add(dev, 0); + vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } vlan_info = rtnl_dereference(dev->vlan_info); @@ -416,7 +416,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_DOWN: if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) - vlan_vid_del(dev, 0); + vlan_vid_del(dev, htons(ETH_P_8021Q), 0); /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_N_VID; i++) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 3df29d34470..04e3b95a0d4 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -185,35 +185,37 @@ static struct vlan_info *vlan_info_alloc(struct net_device *dev) struct vlan_vid_info { struct list_head list; - unsigned short vid; + __be16 proto; + u16 vid; int refcount; }; static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, - unsigned short vid) + __be16 proto, u16 vid) { struct vlan_vid_info *vid_info; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { - if (vid_info->vid == vid) + if (vid_info->proto == proto && vid_info->vid == vid) return vid_info; } return NULL; } -static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid) +static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid) { struct vlan_vid_info *vid_info; vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL); if (!vid_info) return NULL; + vid_info->proto = proto; vid_info->vid = vid; return vid_info; } -static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, +static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, struct vlan_vid_info **pvid_info) { struct net_device *dev = vlan_info->real_dev; @@ -221,12 +223,13 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, struct vlan_vid_info *vid_info; int err; - vid_info = vlan_vid_info_alloc(vid); + vid_info = vlan_vid_info_alloc(proto, vid); if (!vid_info) return -ENOMEM; - if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - err = ops->ndo_vlan_rx_add_vid(dev, vid); + if (proto == htons(ETH_P_8021Q) && + dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { + err = ops->ndo_vlan_rx_add_vid(dev, proto, vid); if (err) { kfree(vid_info); return err; @@ -238,7 +241,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, return 0; } -int vlan_vid_add(struct net_device *dev, unsigned short vid) +int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; @@ -254,9 +257,9 @@ int vlan_vid_add(struct net_device *dev, unsigned short vid) return -ENOMEM; vlan_info_created = true; } - vid_info = vlan_vid_info_get(vlan_info, vid); + vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) { - err = __vlan_vid_add(vlan_info, vid, &vid_info); + err = __vlan_vid_add(vlan_info, proto, vid, &vid_info); if (err) goto out_free_vlan_info; } @@ -279,14 +282,16 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, { struct net_device *dev = vlan_info->real_dev; const struct net_device_ops *ops = dev->netdev_ops; - unsigned short vid = vid_info->vid; + __be16 proto = vid_info->proto; + u16 vid = vid_info->vid; int err; - if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - err = ops->ndo_vlan_rx_kill_vid(dev, vid); + if (proto == htons(ETH_P_8021Q) && + dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { + err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid); if (err) { - pr_warn("failed to kill vid %d for device %s\n", - vid, dev->name); + pr_warn("failed to kill vid %04x/%d for device %s\n", + proto, vid, dev->name); } } list_del(&vid_info->list); @@ -294,7 +299,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, vlan_info->nr_vids--; } -void vlan_vid_del(struct net_device *dev, unsigned short vid) +void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; @@ -305,7 +310,7 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid) if (!vlan_info) return; - vid_info = vlan_vid_info_get(vlan_info, vid); + vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) return; vid_info->refcount--; @@ -333,7 +338,7 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { - err = vlan_vid_add(dev, vid_info->vid); + err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; } @@ -343,7 +348,7 @@ unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { - vlan_vid_del(dev, vid_info->vid); + vlan_vid_del(dev, vid_info->proto, vid_info->vid); } return err; @@ -363,7 +368,7 @@ void vlan_vids_del_by_dev(struct net_device *dev, return; list_for_each_entry(vid_info, &vlan_info->vid_list, list) - vlan_vid_del(dev, vid_info->vid); + vlan_vid_del(dev, vid_info->proto, vid_info->vid); } EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 0b3dbbec80d..c3076e2c429 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -34,6 +34,7 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { + const struct net_device_ops *ops; struct net_bridge_port *p = NULL; struct net_bridge *br; struct net_device *dev; @@ -53,6 +54,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) br = v->parent.br; dev = br->dev; } + ops = dev->netdev_ops; if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { /* Add VLAN to the device filter if it is supported. @@ -61,7 +63,8 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) * that ever changes this code will allow tagged * traffic to enter the bridge. */ - err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); + err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), + vid); if (err) return err; } @@ -83,7 +86,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) out_filt: if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) - dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid); return err; } @@ -97,9 +100,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; + const struct net_device_ops *ops = dev->netdev_ops; if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) - dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid); } clear_bit(vid, v->vlan_bitmap); -- cgit v1.2.3-70-g09d2 From 1fd9b1fc310314911f66d2f14a8e4f0ef37bf47b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 19 Apr 2013 02:04:29 +0000 Subject: net: vlan: prepare for 802.1ad support Make the encapsulation protocol value a property of VLAN devices and change the device lookup functions to take the protocol value into account. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 8 +- drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +- include/linux/if_vlan.h | 2 +- net/8021q/vlan.c | 87 ++++++++-------------- net/8021q/vlan.h | 54 +++++++++++--- net/8021q/vlan_core.c | 10 ++- net/8021q/vlan_dev.c | 7 +- net/8021q/vlan_gvrp.c | 4 + net/8021q/vlan_mvrp.c | 4 + net/8021q/vlan_netlink.c | 9 ++- net/bridge/br_netfilter.c | 3 +- 12 files changed, 108 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 35e89e12a1f..1e79a7643f0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -782,7 +782,7 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) /* rejoin all groups on vlan devices */ list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - vlan_dev = __vlan_find_dev_deep(bond_dev, + vlan_dev = __vlan_find_dev_deep(bond_dev, htons(ETH_P_8021Q), vlan->vlan_id); if (vlan_dev) __bond_resend_igmp_join_requests(vlan_dev); @@ -2512,7 +2512,8 @@ static int bond_has_this_ip(struct bonding *bond, __be32 ip) list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { rcu_read_lock(); - vlan_dev = __vlan_find_dev_deep(bond->dev, vlan->vlan_id); + vlan_dev = __vlan_find_dev_deep(bond->dev, htons(ETH_P_8021Q), + vlan->vlan_id); rcu_read_unlock(); if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip)) return 1; @@ -2541,7 +2542,7 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ return; } if (vlan_id) { - skb = vlan_put_tag(skb, vlan_id); + skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (!skb) { pr_err("failed to insert VLAN tag\n"); return; @@ -2603,6 +2604,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { rcu_read_lock(); vlan_dev = __vlan_find_dev_deep(bond->dev, + htons(ETH_P_8021Q), vlan->vlan_id); rcu_read_unlock(); if (vlan_dev == rt->dst.dev) { diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 4232767862b..0c96e5fe99c 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -185,7 +185,7 @@ static struct net_device *get_iff_from_mac(struct adapter *adapter, if (!memcmp(dev->dev_addr, mac, ETH_ALEN)) { rcu_read_lock(); if (vlan && vlan != VLAN_VID_MASK) { - dev = __vlan_find_dev_deep(dev, vlan); + dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), vlan); } else if (netif_is_bond_slave(dev)) { struct net_device *upper_dev; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index e88e01312c6..d132765f92a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -3346,7 +3346,7 @@ void qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event) rcu_read_lock(); for_each_set_bit(vid, adapter->vlans, VLAN_N_VID) { - dev = __vlan_find_dev_deep(netdev, vid); + dev = __vlan_find_dev_deep(netdev, htons(ETH_P_8021Q), vid); if (!dev) continue; qlcnic_config_indev_addr(adapter, dev, event); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index fcb9ef82aae..2c9fb65f826 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -86,7 +86,7 @@ static inline int is_vlan_dev(struct net_device *dev) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, - u16 vlan_id); + __be16 vlan_proto, u16 vlan_id); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 447c5c93434..9424f3718ea 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -51,14 +51,18 @@ const char vlan_version[] = DRV_VERSION; /* End of global variables definitions. */ -static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) +static int vlan_group_prealloc_vid(struct vlan_group *vg, + __be16 vlan_proto, u16 vlan_id) { struct net_device **array; + unsigned int pidx, vidx; unsigned int size; ASSERT_RTNL(); - array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + pidx = vlan_proto_idx(vlan_proto); + vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN; + array = vg->vlan_devices_arrays[pidx][vidx]; if (array != NULL) return 0; @@ -67,7 +71,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) if (array == NULL) return -ENOBUFS; - vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array; + vg->vlan_devices_arrays[pidx][vidx] = array; return 0; } @@ -93,7 +97,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); - vlan_group_set_device(grp, vlan_id, NULL); + vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL); /* Because unregister_netdevice_queue() makes sure at least one rcu * grace period is respected before device freeing, * we dont need to call synchronize_net() here. @@ -112,13 +116,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) * VLAN is not 0 (leave it there for 802.1p). */ if (vlan_id) - vlan_vid_del(real_dev, htons(ETH_P_8021Q), vlan_id); + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } -int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) +int vlan_check_real_dev(struct net_device *real_dev, + __be16 protocol, u16 vlan_id) { const char *name = real_dev->name; @@ -127,7 +132,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) return -EOPNOTSUPP; } - if (vlan_find_dev(real_dev, vlan_id) != NULL) + if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL) return -EEXIST; return 0; @@ -142,7 +147,7 @@ int register_vlan_dev(struct net_device *dev) struct vlan_group *grp; int err; - err = vlan_vid_add(real_dev, htons(ETH_P_8021Q), vlan_id); + err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id); if (err) return err; @@ -160,7 +165,7 @@ int register_vlan_dev(struct net_device *dev) goto out_uninit_gvrp; } - err = vlan_group_prealloc_vid(grp, vlan_id); + err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id); if (err < 0) goto out_uninit_mvrp; @@ -181,7 +186,7 @@ int register_vlan_dev(struct net_device *dev) /* So, got the sucker initialized, now lets place * it into our local structure. */ - vlan_group_set_device(grp, vlan_id, dev); + vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; return 0; @@ -195,7 +200,7 @@ out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: - vlan_vid_del(real_dev, htons(ETH_P_8021Q), vlan_id); + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); return err; } @@ -213,7 +218,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) if (vlan_id >= VLAN_VID_MASK) return -ERANGE; - err = vlan_check_real_dev(real_dev, vlan_id); + err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id); if (err < 0) return err; @@ -255,6 +260,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) new_dev->mtu = real_dev->mtu; new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); + vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q); vlan_dev_priv(new_dev)->vlan_id = vlan_id; vlan_dev_priv(new_dev)->real_dev = real_dev; vlan_dev_priv(new_dev)->dent = NULL; @@ -341,6 +347,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; struct vlan_dev_priv *vlan; + bool last = false; LIST_HEAD(list); if (is_vlan_dev(dev)) @@ -365,22 +372,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, switch (event) { case NETDEV_CHANGE: /* Propagate real device state to vlan devices */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) netif_stacked_transfer_operstate(dev, vlandev); - } break; case NETDEV_CHANGEADDR: /* Adjust unicast filters on underlying device */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; @@ -390,11 +388,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_CHANGEMTU: - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { if (vlandev->mtu <= dev->mtu) continue; @@ -404,14 +398,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_FEAT_CHANGE: /* Propagate device features to underlying device */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) vlan_transfer_features(dev, vlandev); - } - break; case NETDEV_DOWN: @@ -419,11 +407,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan_vid_del(dev, htons(ETH_P_8021Q), 0); /* Put all VLANs for this dev in the down state too. */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; @@ -437,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (flgs & IFF_UP) continue; @@ -458,17 +438,15 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (dev->reg_state != NETREG_UNREGISTERING) break; - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { /* removal of last vid destroys vlan_info, abort * afterwards */ if (vlan_info->nr_vids == 1) - i = VLAN_N_VID; + last = true; unregister_vlan_dev(vlandev, &list); + if (last) + break; } unregister_netdevice_many(&list); break; @@ -482,13 +460,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_NOTIFY_PEERS: case NETDEV_BONDING_FAILOVER: /* Propagate to vlan devices */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) call_netdevice_notifiers(event, vlandev); - } break; } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 670f1e8cfc0..245de9653db 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -49,6 +49,7 @@ struct netpoll; * @ingress_priority_map: ingress priority mappings * @nr_egress_mappings: number of egress priority mappings * @egress_priority_map: hash of egress priority mappings + * @vlan_proto: VLAN encapsulation protocol * @vlan_id: VLAN identifier * @flags: device flags * @real_dev: underlying netdevice @@ -62,6 +63,7 @@ struct vlan_dev_priv { unsigned int nr_egress_mappings; struct vlan_priority_tci_mapping *egress_priority_map[16]; + __be16 vlan_proto; u16 vlan_id; u16 flags; @@ -87,10 +89,16 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) #define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 #define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) +enum vlan_protos { + VLAN_PROTO_8021Q = 0, + VLAN_PROTO_NUM, +}; + struct vlan_group { unsigned int nr_vlan_devs; struct hlist_node hlist; /* linked list */ - struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; + struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM] + [VLAN_GROUP_ARRAY_SPLIT_PARTS]; }; struct vlan_info { @@ -103,37 +111,64 @@ struct vlan_info { struct rcu_head rcu; }; -static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, - u16 vlan_id) +static inline unsigned int vlan_proto_idx(__be16 proto) +{ + switch (proto) { + case __constant_htons(ETH_P_8021Q): + return VLAN_PROTO_8021Q; + default: + BUG(); + } +} + +static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg, + unsigned int pidx, + u16 vlan_id) { struct net_device **array; - array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + + array = vg->vlan_devices_arrays[pidx] + [vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; } +static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, + __be16 vlan_proto, + u16 vlan_id) +{ + return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id); +} + static inline void vlan_group_set_device(struct vlan_group *vg, - u16 vlan_id, + __be16 vlan_proto, u16 vlan_id, struct net_device *dev) { struct net_device **array; if (!vg) return; - array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)] + [vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; } /* Must be invoked with rcu_read_lock or with RTNL. */ static inline struct net_device *vlan_find_dev(struct net_device *real_dev, - u16 vlan_id) + __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); if (vlan_info) - return vlan_group_get_device(&vlan_info->grp, vlan_id); + return vlan_group_get_device(&vlan_info->grp, + vlan_proto, vlan_id); return NULL; } +#define vlan_group_for_each_dev(grp, i, dev) \ + for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \ + if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \ + (i) % VLAN_N_VID))) + /* found in vlan_dev.c */ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); @@ -142,7 +177,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); -int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); +int vlan_check_real_dev(struct net_device *real_dev, + __be16 protocol, u16 vlan_id); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 04e3b95a0d4..4e4c360353e 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -12,7 +12,7 @@ bool vlan_do_receive(struct sk_buff **skbp) struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; - vlan_dev = vlan_find_dev(skb->dev, vlan_id); + vlan_dev = vlan_find_dev(skb->dev, htons(ETH_P_8021Q), vlan_id); if (!vlan_dev) return false; @@ -62,12 +62,13 @@ bool vlan_do_receive(struct sk_buff **skbp) /* Must be invoked with rcu_read_lock. */ struct net_device *__vlan_find_dev_deep(struct net_device *dev, - u16 vlan_id) + __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); if (vlan_info) { - return vlan_group_get_device(&vlan_info->grp, vlan_id); + return vlan_group_get_device(&vlan_info->grp, + vlan_proto, vlan_id); } else { /* * Lower devices of master uppers (bonding, team) do not have @@ -78,7 +79,8 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev, upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - return __vlan_find_dev_deep(upper_dev, vlan_id); + return __vlan_find_dev_deep(upper_dev, + vlan_proto, vlan_id); } return NULL; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 5c4892a8641..d7457b7e1b9 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -99,6 +99,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, const void *daddr, const void *saddr, unsigned int len) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_hdr *vhdr; unsigned int vhdrlen = 0; u16 vlan_tci = 0; @@ -120,8 +121,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, else vhdr->h_vlan_encapsulated_proto = htons(len); - skb->protocol = htons(ETH_P_8021Q); - type = ETH_P_8021Q; + skb->protocol = vlan->vlan_proto; + type = ntohs(vlan->vlan_proto); vhdrlen = VLAN_HLEN; } @@ -161,7 +162,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ - if (veth->h_vlan_proto != htons(ETH_P_8021Q) || + if (veth->h_vlan_proto != vlan->vlan_proto || vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; vlan_tci = vlan->vlan_id; diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c index 6f975535276..66a80320b03 100644 --- a/net/8021q/vlan_gvrp.c +++ b/net/8021q/vlan_gvrp.c @@ -32,6 +32,8 @@ int vlan_gvrp_request_join(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return 0; return garp_request_join(vlan->real_dev, &vlan_gvrp_app, &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); } @@ -41,6 +43,8 @@ void vlan_gvrp_request_leave(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return; garp_request_leave(vlan->real_dev, &vlan_gvrp_app, &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); } diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c index d9ec1d5964a..e0fe091801b 100644 --- a/net/8021q/vlan_mvrp.c +++ b/net/8021q/vlan_mvrp.c @@ -38,6 +38,8 @@ int vlan_mvrp_request_join(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return 0; return mrp_request_join(vlan->real_dev, &vlan_mrp_app, &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); } @@ -47,6 +49,8 @@ void vlan_mvrp_request_leave(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return; mrp_request_leave(vlan->real_dev, &vlan_mrp_app, &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); } diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 1789658b7cd..a1a956ab39a 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -118,11 +118,12 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, if (!real_dev) return -ENODEV; - vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); - vlan->real_dev = real_dev; - vlan->flags = VLAN_FLAG_REORDER_HDR; + vlan->vlan_proto = htons(ETH_P_8021Q); + vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); + vlan->real_dev = real_dev; + vlan->flags = VLAN_FLAG_REORDER_HDR; - err = vlan_check_real_dev(real_dev, vlan->vlan_id); + err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id); if (err < 0) return err; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fe43bc7b063..bd61bf5159c 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,8 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; - vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); + vlan = __vlan_find_dev_deep(br, htons(ETH_P_8021Q), + vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; } -- cgit v1.2.3-70-g09d2 From 86a9bad3ab6b6f858fd4443b48738cabbb6d094c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 19 Apr 2013 02:04:30 +0000 Subject: net: vlan: add protocol argument to packet tagging functions Add a protocol argument to the VLAN packet tagging functions. In case of HW tagging, we need that protocol available in the ndo_start_xmit functions, so it is stored in a new field in the skb. The new field fits into a hole (on 64 bit) and doesn't increase the sks's size. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/infiniband/hw/nes/nes_hw.c | 2 +- drivers/net/bonding/bond_alb.c | 4 +-- drivers/net/ethernet/3com/typhoon.c | 2 +- drivers/net/ethernet/adaptec/starfire.c | 2 +- drivers/net/ethernet/alteon/acenic.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 2 +- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 2 +- drivers/net/ethernet/atheros/atlx/atl1.c | 2 +- drivers/net/ethernet/atheros/atlx/atl2.c | 2 +- drivers/net/ethernet/broadcom/bnx2.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 4 +-- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/brocade/bna/bnad.c | 2 +- drivers/net/ethernet/chelsio/cxgb/sge.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/sge.c | 4 +-- drivers/net/ethernet/chelsio/cxgb4/sge.c | 4 +-- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 5 ++-- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 6 ++--- drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/igbvf/netdev.c | 2 +- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 4 +-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/ethernet/jme.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 +-- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- drivers/net/ethernet/natsemi/ns83820.c | 2 +- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/neterion/vxge/vxge-main.c | 2 +- drivers/net/ethernet/nvidia/forcedeth.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 8 +++--- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 8 +++--- drivers/net/ethernet/realtek/8139cp.c | 2 +- drivers/net/ethernet/realtek/r8169.c | 2 +- drivers/net/ethernet/tehuti/tehuti.c | 2 +- drivers/net/ethernet/via/via-rhine.c | 2 +- drivers/net/ethernet/via/via-velocity.c | 2 +- drivers/net/usb/cdc_mbim.c | 2 +- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- include/linux/if_vlan.h | 33 ++++++++++++++++------- include/linux/skbuff.h | 2 ++ net/8021q/vlan_core.c | 8 +++--- net/8021q/vlan_dev.c | 2 +- net/batman-adv/bridge_loop_avoidance.c | 2 +- net/bridge/br_netfilter.c | 2 +- net/bridge/br_vlan.c | 4 +-- net/core/dev.c | 5 ++-- net/core/netpoll.c | 5 ++-- net/core/skbuff.c | 1 + net/openvswitch/actions.c | 6 ++--- net/openvswitch/datapath.c | 2 +- 56 files changed, 107 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 67647e26461..418004c93fe 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2948,7 +2948,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", nesvnic->netdev->name, vlan_tag); - __vlan_hwaccel_put_tag(rx_skb, vlan_tag); + __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); } if (nes_use_lro) lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index f5e05272302..e02cc265723 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -514,7 +514,7 @@ static void rlb_update_client(struct rlb_client_info *client_info) skb->dev = client_info->slave->dev; if (client_info->tag) { - skb = vlan_put_tag(skb, client_info->vlan_id); + skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id); if (!skb) { pr_err("%s: Error: failed to insert VLAN tag\n", client_info->slave->bond->dev->name); @@ -1014,7 +1014,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) continue; } - skb = vlan_put_tag(skb, vlan->vlan_id); + skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id); if (!skb) { pr_err("%s: Error: failed to insert VLAN tag\n", bond->dev->name); diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 839a9621374..144942f6372 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -1690,7 +1690,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read skb_checksum_none_assert(new_skb); if (rx->rxStatus & TYPHOON_RX_VLAN) - __vlan_hwaccel_put_tag(new_skb, + __vlan_hwaccel_put_tag(new_skb, htons(ETH_P_8021Q), ntohl(rx->vlanTag) & 0xffff); netif_receive_skb(new_skb); diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index cdbc5443ae3..8b04bfc20cf 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -1498,7 +1498,7 @@ static int __netdev_rx(struct net_device *dev, int *quota) printk(KERN_DEBUG " netdev_rx() vlanid = %d\n", vlid); } - __vlan_hwaccel_put_tag(skb, vlid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlid); } #endif /* VLAN_SUPPORT */ netif_receive_skb(skb); diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index a7689d93105..b7894f8af9d 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -2019,7 +2019,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) /* send it up */ if ((bd_flags & BD_FLG_VLAN_TAG)) - __vlan_hwaccel_put_tag(skb, retdesc->vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), retdesc->vlan); netif_rx(skb); dev->stats.rx_packets++; diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 6bad84d6e2c..8e6b665a672 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -793,7 +793,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) #if AMD8111E_VLAN_TAG_USED if (vtag == TT_VLAN_TAGGED){ u16 vlan_tag = le16_to_cpu(lp->rx_ring[rx_index].tag_ctrl_info); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } #endif netif_receive_skb(skb); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 3565255cdd8..0ba900762b1 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -1809,7 +1809,7 @@ rrs_checked: AT_TAG_TO_VLAN(rrs->vlan_tag, vlan); vlan = le16_to_cpu(vlan); - __vlan_hwaccel_put_tag(skb, vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); } netif_receive_skb(skb); diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 598a6115166..0688bb82b44 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1435,7 +1435,7 @@ static void atl1e_clean_rx_irq(struct atl1e_adapter *adapter, u8 que, netdev_dbg(netdev, "RXD VLAN TAG=0x%04x\n", prrs->vtag); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } netif_receive_skb(skb); diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index fd7d85044e4..fa0915f3999 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2024,7 +2024,7 @@ rrd_ok: ((rrd->vlan_tag & 7) << 13) | ((rrd->vlan_tag & 8) << 9); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } netif_receive_skb(skb); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 6b2c08a89b7..265ce1b752e 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -452,7 +452,7 @@ static void atl2_intr_rx(struct atl2_adapter *adapter) ((rxd->status.vtag&7) << 13) | ((rxd->status.vtag&8) << 9); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } netif_rx(skb); netdev->stats.rx_bytes += rx_size; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 42a8bc8df5d..5d204492c60 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -3211,7 +3211,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) } if ((status & L2_FHDR_STATUS_L2_VLAN_TAG) && !(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG)) - __vlan_hwaccel_put_tag(skb, rx_hdr->l2_fhdr_vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rx_hdr->l2_fhdr_vlan_tag); skb->protocol = eth_type_trans(skb, bp->dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 352e58ede4d..6b50443d345 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -719,7 +719,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, if (!bnx2x_fill_frag_skb(bp, fp, tpa_info, pages, skb, cqe, cqe_idx)) { if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN) - __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tpa_info->vlan_tag); bnx2x_gro_receive(bp, fp, skb); } else { DP(NETIF_MSG_RX_STATUS, @@ -994,7 +994,7 @@ reuse_rx: if (le16_to_cpu(cqe_fp->pars_flags.flags) & PARSING_FLAGS_VLAN) - __vlan_hwaccel_put_tag(skb, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); napi_gro_receive(&fp->napi, skb); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 0c22c9a059c..ac83c87e0b1 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6715,7 +6715,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) if (desc->type_flags & RXD_FLAG_VLAN && !(tp->rx_mode & RX_MODE_KEEP_VLAN_TAG)) - __vlan_hwaccel_put_tag(skb, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), desc->err_vlan & RXD_VLAN_MASK); napi_gro_receive(&tnapi->napi, skb); diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index c0bc44e51fa..ce4a030d3d0 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -610,7 +610,7 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) rcb->rxq->rx_bytes += length; if (flags & BNA_CQ_EF_VLAN) - __vlan_hwaccel_put_tag(skb, ntohs(cmpl->vlan_tag)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cmpl->vlan_tag)); if (BNAD_RXBUF_IS_PAGE(unmap_q->type)) napi_gro_frags(&rx_ctrl->napi); diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index f85e0659432..8061fb0ef7e 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1386,7 +1386,7 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) if (p->vlan_valid) { st->vlan_xtract++; - __vlan_hwaccel_put_tag(skb, ntohs(p->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan)); } netif_receive_skb(skb); } diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 9d67eb794c4..f12e6b85a65 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2030,7 +2030,7 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, if (p->vlan_valid) { qs->port_stats[SGE_PSTAT_VLANEX]++; - __vlan_hwaccel_put_tag(skb, ntohs(p->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan)); } if (rq->polling) { if (lro) @@ -2132,7 +2132,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, if (cpl->vlan_valid) { qs->port_stats[SGE_PSTAT_VLANEX]++; - __vlan_hwaccel_put_tag(skb, ntohs(cpl->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); } napi_gro_frags(&qs->napi); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 8b47b253e20..2bfbb206b35 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1633,7 +1633,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb->rxhash = (__force u32)pkt->rsshdr.hash_val; if (unlikely(pkt->vlan_ex)) { - __vlan_hwaccel_put_tag(skb, ntohs(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); rxq->stats.vlan_ex++; } ret = napi_gro_frags(&rxq->rspq.napi); @@ -1705,7 +1705,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, skb_checksum_none_assert(skb); if (unlikely(pkt->vlan_ex)) { - __vlan_hwaccel_put_tag(skb, ntohs(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); rxq->stats.vlan_ex++; } netif_receive_skb(skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 61dfb2a4792..df296af20bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1482,7 +1482,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb_record_rx_queue(skb, rxq->rspq.idx); if (pkt->vlan_ex) { - __vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q), + be16_to_cpu(pkt->vlan)); rxq->stats.vlan_ex++; } ret = napi_gro_frags(&rxq->rspq.napi); @@ -1551,7 +1552,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, if (pkt->vlan_ex) { rxq->stats.vlan_ex++; - __vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(pkt->vlan)); } netif_receive_skb(skb); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 05c1e59b6bf..635f55992d7 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1300,7 +1300,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, } if (vlan_stripped) - __vlan_hwaccel_put_tag(skb, vlan_tci); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); if (netdev->features & NETIF_F_GRO) napi_gro_receive(&enic->napi[q_number], skb); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b4133318491..811d0a47d17 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -771,7 +771,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, if (vlan_tx_tag_present(skb)) { vlan_tag = be_get_tx_vlan_tag(adapter, skb); - __vlan_put_tag(skb, vlan_tag); + __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); skb->vlan_tci = 0; } @@ -1383,7 +1383,7 @@ static void be_rx_compl_process(struct be_rx_obj *rxo, if (rxcp->vlanf) - __vlan_hwaccel_put_tag(skb, rxcp->vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag); netif_receive_skb(skb); } @@ -1439,7 +1439,7 @@ void be_rx_compl_process_gro(struct be_rx_obj *rxo, struct napi_struct *napi, skb->rxhash = rxcp->rss_hash; if (rxcp->vlanf) - __vlan_hwaccel_put_tag(skb, rxcp->vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag); napi_gro_frags(napi); } diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 8239dafdd8e..59ad007dd5a 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -4003,7 +4003,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter, u8 status, if (status & E1000_RXD_STAT_VP) { u16 vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } napi_gro_receive(&adapter->napi, skb); } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 8c17f01a155..da7f2fad5ba 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -554,7 +554,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter, skb->protocol = eth_type_trans(skb, netdev); if (staterr & E1000_RXD_STAT_VP) - __vlan_hwaccel_put_tag(skb, tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag); napi_gro_receive(&adapter->napi, skb); } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d13ea71c7c1..9bf08b977da 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6683,7 +6683,7 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, else vid = le16_to_cpu(rx_desc->wb.upper.vlan); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } skb_record_rx_queue(skb, rx_ring->queue_index); diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 3854fd698b8..93eb7ee06d3 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -116,7 +116,7 @@ static void igbvf_receive_skb(struct igbvf_adapter *adapter, else vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; if (test_bit(vid, adapter->active_vlans)) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } napi_gro_receive(&adapter->rx_ring->napi, skb); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index a32f274acd3..fce3e92f9d1 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -2082,8 +2082,8 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do) skb->protocol = eth_type_trans(skb, netdev); if (status & IXGB_RX_DESC_STATUS_VP) - __vlan_hwaccel_put_tag(skb, - le16_to_cpu(rx_desc->special)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rx_desc->special)); netif_receive_skb(skb); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3becffc7732..6225f880a3f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1491,7 +1491,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } skb_record_rx_queue(skb, rx_ring->queue_index); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4bc1f84c935..1f5166ad6bb 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -291,7 +291,7 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan); if (is_vlan && test_bit(tag & VLAN_VID_MASK, adapter->active_vlans)) - __vlan_hwaccel_put_tag(skb, tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag); if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) napi_gro_receive(&q_vector->napi, skb); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index d28ce6f9717..070a6f1a057 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1059,7 +1059,7 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx) if (rxdesc->descwb.flags & cpu_to_le16(RXWBFLAG_TAGON)) { u16 vid = le16_to_cpu(rxdesc->descwb.vlan); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); NET_STAT(jme).rx_bytes += 4; } jme->jme_rx(skb); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index bf9da1b7b09..256ae789c14 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2713,7 +2713,7 @@ static void sky2_rx_tag(struct sky2_port *sky2, u16 length) struct sk_buff *skb; skb = sky2->rx_ring[sky2->rx_next].skb; - __vlan_hwaccel_put_tag(skb, be16_to_cpu(length)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(length)); } static void sky2_rx_hash(struct sky2_port *sky2, u32 status) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c7f856308e1..4006f8857cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -673,7 +673,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) { u16 vid = be16_to_cpu(cqe->sl_vid); - __vlan_hwaccel_put_tag(gro_skb, vid); + __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); } if (dev->features & NETIF_F_RXHASH) @@ -716,7 +716,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) - __vlan_hwaccel_put_tag(skb, be16_to_cpu(cqe->sl_vid)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); /* Push it up the stack */ netif_receive_skb(skb); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 46262ea610f..7be9788ed0f 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1290,7 +1290,7 @@ myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb) skb->csum = csum_sub(skb->csum, vsum); } /* pop tag */ - __vlan_hwaccel_put_tag(skb, ntohs(veh->h_vlan_TCI)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(veh->h_vlan_TCI)); memmove(va + VLAN_HLEN, va, 2 * ETH_ALEN); skb->len -= VLAN_HLEN; skb->data_len -= VLAN_HLEN; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 60267e91cbd..d3b47003a57 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -911,7 +911,7 @@ static void rx_irq(struct net_device *ndev) unsigned short tag; tag = ntohs(extsts & EXTSTS_VTG_MASK); - __vlan_hwaccel_put_tag(skb, tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_IPV6), tag); } #endif rx_rc = netif_rx(skb); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index ec82d59b03e..51b00941302 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -8555,7 +8555,7 @@ static void queue_rx_frame(struct sk_buff *skb, u16 vlan_tag) skb->protocol = eth_type_trans(skb, dev); if (vlan_tag && sp->vlan_strip_flag) - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); if (sp->config.napi) netif_receive_skb(skb); else diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index a9396142201..cbfaed5f2f8 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -312,7 +312,7 @@ vxge_rx_complete(struct vxge_ring *ring, struct sk_buff *skb, u16 vlan, if (ext_info->vlan && ring->vlan_tag_strip == VXGE_HW_VPATH_RPA_STRIP_VLAN_TAG_ENABLE) - __vlan_hwaccel_put_tag(skb, ext_info->vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ext_info->vlan); napi_gro_receive(ring->napi_p, skb); vxge_debug_entryexit(VXGE_TRACE, diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index fcad64081d7..b003fe53c8e 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2969,7 +2969,7 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) vlanflags & NV_RX3_VLAN_TAG_PRESENT) { u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK; - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } napi_gro_receive(&np->napi, skb); u64_stats_update_begin(&np->swstats_rx_syncp); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index a85ca63a2c9..56223a6aa40 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -1050,7 +1050,7 @@ qlcnic_process_rcv(struct qlcnic_adapter *adapter, skb->protocol = eth_type_trans(skb, netdev); if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); napi_gro_receive(&sds_ring->napi, skb); @@ -1153,7 +1153,7 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, } if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); netif_receive_skb(skb); adapter->stats.lro_pkts++; @@ -1518,7 +1518,7 @@ qlcnic_83xx_process_rcv(struct qlcnic_adapter *adapter, skb->protocol = eth_type_trans(skb, netdev); if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); napi_gro_receive(&sds_ring->napi, skb); @@ -1615,7 +1615,7 @@ qlcnic_83xx_process_lro(struct qlcnic_adapter *adapter, } if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); netif_receive_skb(skb); diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index a9016acc2d6..44cf72ac248 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1498,7 +1498,7 @@ static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev, skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rx_ring->cq_id); if (vlan_id != 0xffff) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); napi_gro_frags(napi); } @@ -1574,7 +1574,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev, skb_record_rx_queue(skb, rx_ring->cq_id); if (vlan_id != 0xffff) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (skb->ip_summed == CHECKSUM_UNNECESSARY) napi_gro_receive(napi, skb); else @@ -1670,7 +1670,7 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, skb_record_rx_queue(skb, rx_ring->cq_id); if (vlan_id != 0xffff) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (skb->ip_summed == CHECKSUM_UNNECESSARY) napi_gro_receive(&rx_ring->napi, skb); else @@ -1975,7 +1975,7 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev, rx_ring->rx_bytes += skb->len; skb_record_rx_queue(skb, rx_ring->cq_id); if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V) && (vlan_id != 0)) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (skb->ip_summed == CHECKSUM_UNNECESSARY) napi_gro_receive(&rx_ring->napi, skb); else diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 6d03b52e56f..7d1fb9ad129 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -431,7 +431,7 @@ static inline void cp_rx_skb (struct cp_private *cp, struct sk_buff *skb, cp->dev->stats.rx_bytes += skb->len; if (opts2 & RxVlanTagged) - __vlan_hwaccel_put_tag(skb, swab16(opts2 & 0xffff)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff)); napi_gro_receive(&cp->napi, skb); } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 86d5d7909d1..c6dac38fd9c 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1843,7 +1843,7 @@ static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb) u32 opts2 = le32_to_cpu(desc->opts2); if (opts2 & RxVlanTag) - __vlan_hwaccel_put_tag(skb, swab16(opts2 & 0xffff)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff)); } static int rtl8169_gset_tbi(struct net_device *dev, struct ethtool_cmd *cmd) diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index f7050c57351..571452e786d 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1148,7 +1148,7 @@ NETIF_RX_MUX(struct bdx_priv *priv, u32 rxd_val1, u16 rxd_vlan, priv->ndev->name, GET_RXD_VLAN_ID(rxd_vlan), GET_RXD_VTAG(rxd_val1)); - __vlan_hwaccel_put_tag(skb, GET_RXD_VLAN_TCI(rxd_vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), GET_RXD_VLAN_TCI(rxd_vlan)); } netif_receive_skb(skb); } diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index c6014916f62..ca98acabf1b 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -1936,7 +1936,7 @@ static int rhine_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); if (unlikely(desc_length & DescTag)) - __vlan_hwaccel_put_tag(skb, vlan_tci); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); netif_receive_skb(skb); u64_stats_update_begin(&rp->rx_stats.syncp); diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 91cd59146c2..fb6248956ee 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2080,7 +2080,7 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx) if (rd->rdesc0.RSR & RSR_DETAG) { u16 vid = swab16(le16_to_cpu(rd->rdesc1.PQTAG)); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } netif_rx(skb); diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index b7e9cd7ca6d..cc6dfe4102f 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -221,7 +221,7 @@ static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_ /* map MBIM session to VLAN */ if (tci) - vlan_put_tag(skb, tci); + vlan_put_tag(skb, htons(ETH_P_8021Q), tci); err: return skb; } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 27b889992ab..55a62cae2cb 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1293,7 +1293,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, skb->protocol = eth_type_trans(skb, adapter->netdev); if (unlikely(rcd->ts)) - __vlan_hwaccel_put_tag(skb, rcd->tci); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci); if (adapter->netdev->features & NETIF_F_LRO) netif_receive_skb(skb); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 2c9fb65f826..8086ff9988b 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -157,9 +157,18 @@ static inline bool vlan_uses_dev(const struct net_device *dev) } #endif +static inline bool vlan_hw_offload_capable(netdev_features_t features, + __be16 proto) +{ + if (proto == htons(ETH_P_8021Q) && features & NETIF_F_HW_VLAN_CTAG_TX) + return true; + return false; +} + /** * vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload @@ -170,7 +179,8 @@ static inline bool vlan_uses_dev(const struct net_device *dev) * * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { struct vlan_ethhdr *veth; @@ -185,7 +195,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) skb->mac_header -= VLAN_HLEN; /* first, the ethernet type */ - veth->h_vlan_proto = htons(ETH_P_8021Q); + veth->h_vlan_proto = vlan_proto; /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); @@ -204,24 +214,28 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. */ -static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { - skb = vlan_insert_tag(skb, vlan_tci); + skb = vlan_insert_tag(skb, vlan_proto, vlan_tci); if (skb) - skb->protocol = htons(ETH_P_8021Q); + skb->protocol = vlan_proto; return skb; } /** * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest */ static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { + skb->vlan_proto = vlan_proto; skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; return skb; } @@ -236,12 +250,13 @@ static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, * Assumes skb->dev is the target that will xmit this frame. * Returns a VLAN tagged skb. */ -static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { - if (skb->dev->features & NETIF_F_HW_VLAN_CTAG_TX) { - return __vlan_hwaccel_put_tag(skb, vlan_tci); + if (vlan_hw_offload_capable(skb->dev->features, vlan_proto)) { + return __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); } else { - return __vlan_put_tag(skb, vlan_tci); + return __vlan_put_tag(skb, vlan_proto, vlan_tci); } } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e27d1c782f3..f5bed7b3195 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -387,6 +387,7 @@ typedef unsigned char *sk_buff_data_t; * @secmark: security marking * @mark: Generic packet mark * @dropcount: total number of sk_receive_queue overflows + * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) @@ -465,6 +466,7 @@ struct sk_buff { __u32 rxhash; + __be16 vlan_proto; __u16 vlan_tci; #ifdef CONFIG_NET_SCHED diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 4e4c360353e..bdb0b9d2e9c 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -8,11 +8,12 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; + __be16 vlan_proto = skb->vlan_proto; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; - vlan_dev = vlan_find_dev(skb->dev, htons(ETH_P_8021Q), vlan_id); + vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id); if (!vlan_dev) return false; @@ -38,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp) * original position later */ skb_push(skb, offset); - skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto, + skb->vlan_tci); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); @@ -127,7 +129,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) vhdr = (struct vlan_hdr *) skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); - __vlan_hwaccel_put_tag(skb, vlan_tci); + __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index d7457b7e1b9..8af508536d3 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -167,7 +167,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, u16 vlan_tci; vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_hwaccel_put_tag(skb, vlan_tci); + skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci); } skb->dev = vlan->real_dev; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6a4f728680a..379061c7254 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -341,7 +341,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, } if (vid != -1) - skb = vlan_insert_tag(skb, vid); + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bd61bf5159c..1ed75bfd8d1 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; - vlan = __vlan_find_dev_deep(br, htons(ETH_P_8021Q), + vlan = __vlan_find_dev_deep(br, skb->vlan_proto, vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index c3076e2c429..bd58b45f5f9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -175,7 +175,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, * mac header. */ skb_push(skb, ETH_HLEN); - skb = __vlan_put_tag(skb, skb->vlan_tci); + skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci); if (!skb) goto out; /* put skb->data back to where it was */ @@ -217,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* PVID is set on this port. Any untagged ingress * frame is considered to belong to this vlan. */ - __vlan_hwaccel_put_tag(skb, pvid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); return true; } diff --git a/net/core/dev.c b/net/core/dev.c index 07a8e9dc43f..3a12ee132b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2482,8 +2482,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && - !(features & NETIF_F_HW_VLAN_CTAG_TX)) { - skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + !vlan_hw_offload_capable(features, skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); if (unlikely(!skb)) goto out; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8de961e67cf..209d84253dd 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -383,8 +383,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (__netif_tx_trylock(txq)) { if (!netif_xmit_stopped(txq)) { if (vlan_tx_tag_present(skb) && - !(netif_skb_features(skb) & NETIF_F_HW_VLAN_CTAG_TX)) { - skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + !vlan_hw_offload_capable(netif_skb_features(skb), + skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); if (unlikely(!skb)) break; skb->vlan_tci = 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ba646145cd5..a92d9e7d10f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -707,6 +707,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->tc_verd = old->tc_verd; #endif #endif + new->vlan_proto = old->vlan_proto; new->vlan_tci = old->vlan_tci; skb_copy_secmark(new, old); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index d4d5363c7ba..894b6cbdd92 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb) if (unlikely(err)) return err; - __vlan_hwaccel_put_tag(skb, ntohs(tci)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci)); return 0; } @@ -110,7 +110,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla /* push down current VLAN tag */ current_tag = vlan_tx_tag_get(skb); - if (!__vlan_put_tag(skb, current_tag)) + if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -118,7 +118,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla + (2 * ETH_ALEN), VLAN_HLEN, 0)); } - __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); + __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b7d0b7c3fe2..7bb5d4f6bb9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -401,7 +401,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, if (!nskb) return -ENOMEM; - nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); + nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb)); if (!nskb) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 8ad227ff89a7e6f05d07cd0acfd95ed3a24450ca Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 19 Apr 2013 02:04:31 +0000 Subject: net: vlan: add 802.1ad support Add support for 802.1ad VLAN devices. This mainly consists of checking for ETH_P_8021AD in addition to ETH_P_8021Q in a couple of places and check offloading capabilities based on the used protocol. Configuration is done using "ip link": # ip link add link eth0 eth0.1000 \ type vlan proto 802.1ad id 1000 # ip link add link eth0.1000 eth0.1000.1000 \ type vlan proto 802.1q id 1000 52:54:00:12:34:56 > 92:b1:54:28:e4:8c, ethertype 802.1Q (0x8100), length 106: vlan 1000, p 0, ethertype 802.1Q, vlan 1000, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 84) 20.1.0.2 > 20.1.0.1: ICMP echo request, id 3003, seq 8, length 64 92:b1:54:28:e4:8c > 52:54:00:12:34:56, ethertype 802.1Q-QinQ (0x88a8), length 106: vlan 1000, p 0, ethertype 802.1Q, vlan 1000, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 47944, offset 0, flags [none], proto ICMP (1), length 84) 20.1.0.1 > 20.1.0.2: ICMP echo reply, id 3003, seq 8, length 64 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 6 ++++-- include/linux/netdev_features.h | 6 ++++++ include/uapi/linux/if_link.h | 1 + net/8021q/Kconfig | 2 +- net/8021q/vlan.h | 3 +++ net/8021q/vlan_core.c | 18 ++++++++++++++---- net/8021q/vlan_netlink.c | 25 ++++++++++++++++++++++--- net/core/dev.c | 16 ++++++++++------ 8 files changed, 61 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8086ff9988b..a78f9390da8 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -162,6 +162,8 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, { if (proto == htons(ETH_P_8021Q) && features & NETIF_F_HW_VLAN_CTAG_TX) return true; + if (proto == htons(ETH_P_8021AD) && features & NETIF_F_HW_VLAN_STAG_TX) + return true; return false; } @@ -271,9 +273,9 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; - if (veth->h_vlan_proto != htons(ETH_P_8021Q)) { + if (veth->h_vlan_proto != htons(ETH_P_8021Q) && + veth->h_vlan_proto != htons(ETH_P_8021AD)) return -EINVAL; - } *vlan_tci = ntohs(veth->h_vlan_TCI); return 0; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 785913b8983..cbaa027ef5a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -25,6 +25,9 @@ enum { NETIF_F_HW_VLAN_CTAG_TX_BIT, /* Transmit VLAN CTAG HW acceleration */ NETIF_F_HW_VLAN_CTAG_RX_BIT, /* Receive VLAN CTAG HW acceleration */ NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */ + NETIF_F_HW_VLAN_STAG_TX_BIT, /* Transmit VLAN STAG HW acceleration */ + NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */ + NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ NETIF_F_LLTX_BIT, /* LockLess TX - deprecated. Please */ @@ -83,6 +86,9 @@ enum { #define NETIF_F_HW_VLAN_CTAG_FILTER __NETIF_F(HW_VLAN_CTAG_FILTER) #define NETIF_F_HW_VLAN_CTAG_RX __NETIF_F(HW_VLAN_CTAG_RX) #define NETIF_F_HW_VLAN_CTAG_TX __NETIF_F(HW_VLAN_CTAG_TX) +#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) +#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) +#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_IP_CSUM __NETIF_F(IP_CSUM) #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) #define NETIF_F_LLTX __NETIF_F(LLTX) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9922704f08a..e3163544f33 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -250,6 +250,7 @@ enum { IFLA_VLAN_FLAGS, IFLA_VLAN_EGRESS_QOS, IFLA_VLAN_INGRESS_QOS, + IFLA_VLAN_PROTOCOL, __IFLA_VLAN_MAX, }; diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index 8f7517df41a..b85a91fa61f 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -3,7 +3,7 @@ # config VLAN_8021Q - tristate "802.1Q VLAN Support" + tristate "802.1Q/802.1ad VLAN Support" ---help--- Select this and you will be able to create 802.1Q VLAN interfaces on your ethernet interfaces. 802.1Q VLAN supports almost diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 245de9653db..abc9cb631c4 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -91,6 +91,7 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) enum vlan_protos { VLAN_PROTO_8021Q = 0, + VLAN_PROTO_8021AD, VLAN_PROTO_NUM, }; @@ -116,6 +117,8 @@ static inline unsigned int vlan_proto_idx(__be16 proto) switch (proto) { case __constant_htons(ETH_P_8021Q): return VLAN_PROTO_8021Q; + case __constant_htons(ETH_P_8021AD): + return VLAN_PROTO_8021AD; default: BUG(); } diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index bdb0b9d2e9c..ebfa2fceb88 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -194,6 +194,18 @@ struct vlan_vid_info { int refcount; }; +static bool vlan_hw_filter_capable(const struct net_device *dev, + const struct vlan_vid_info *vid_info) +{ + if (vid_info->proto == htons(ETH_P_8021Q) && + dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + return true; + if (vid_info->proto == htons(ETH_P_8021AD) && + dev->features & NETIF_F_HW_VLAN_STAG_FILTER) + return true; + return false; +} + static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, __be16 proto, u16 vid) { @@ -231,8 +243,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, if (!vid_info) return -ENOMEM; - if (proto == htons(ETH_P_8021Q) && - dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (vlan_hw_filter_capable(dev, vid_info)) { err = ops->ndo_vlan_rx_add_vid(dev, proto, vid); if (err) { kfree(vid_info); @@ -290,8 +301,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, u16 vid = vid_info->vid; int err; - if (proto == htons(ETH_P_8021Q) && - dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (vlan_hw_filter_capable(dev, vid_info)) { err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid); if (err) { pr_warn("failed to kill vid %04x/%d for device %s\n", diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index a1a956ab39a..30912973228 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = { [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, + [IFLA_VLAN_PROTOCOL] = { .type = NLA_U16 }, }; static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { @@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (!data) return -EINVAL; + if (data[IFLA_VLAN_PROTOCOL]) { + switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) { + case __constant_htons(ETH_P_8021Q): + case __constant_htons(ETH_P_8021AD): + break; + default: + return -EPROTONOSUPPORT; + } + } + if (data[IFLA_VLAN_ID]) { id = nla_get_u16(data[IFLA_VLAN_ID]); if (id >= VLAN_VID_MASK) @@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev; + __be16 proto; int err; if (!data[IFLA_VLAN_ID]) @@ -118,7 +130,12 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, if (!real_dev) return -ENODEV; - vlan->vlan_proto = htons(ETH_P_8021Q); + if (data[IFLA_VLAN_PROTOCOL]) + proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]); + else + proto = htons(ETH_P_8021Q); + + vlan->vlan_proto = proto; vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); vlan->real_dev = real_dev; vlan->flags = VLAN_FLAG_REORDER_HDR; @@ -152,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); - return nla_total_size(2) + /* IFLA_VLAN_ID */ + return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ + nla_total_size(2) + /* IFLA_VLAN_ID */ sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); @@ -167,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) struct nlattr *nest; unsigned int i; - if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id)) + if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) || + nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id)) goto nla_put_failure; if (vlan->flags) { f.flags = vlan->flags; diff --git a/net/core/dev.c b/net/core/dev.c index 3a12ee132b5..fad4c385f7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2212,7 +2212,7 @@ __be16 skb_network_protocol(struct sk_buff *skb) __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; - while (type == htons(ETH_P_8021Q)) { + while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) @@ -2428,20 +2428,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q)) { + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { return harmonize_features(skb, protocol, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX); + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); - if (protocol != htons(ETH_P_8021Q)) { + if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; return harmonize_features(skb, protocol, features); } } @@ -3360,6 +3362,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) case __constant_htons(ETH_P_IP): case __constant_htons(ETH_P_IPV6): case __constant_htons(ETH_P_8021Q): + case __constant_htons(ETH_P_8021AD): return true; default: return false; @@ -3400,7 +3403,8 @@ another_round: __this_cpu_inc(softnet_data.processed); - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || + skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = vlan_untag(skb); if (unlikely(!skb)) goto unlock; -- cgit v1.2.3-70-g09d2 From e32123e59871b9389d5b3fe9318611c7f1d1307a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:46:57 +0000 Subject: netlink: rename ssk to sk in struct netlink_skb_params Memory mapped netlink needs to store the receiving userspace socket when sending from the kernel to userspace. Rename 'ssk' to 'sk' to avoid confusion. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- net/ipv4/inet_diag.c | 6 +++--- net/ipv4/udp_diag.c | 4 ++-- net/netfilter/nfnetlink_log.c | 2 +- net/netlink/af_netlink.c | 2 +- net/sched/cls_flow.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e0f746b7b95..d8e9264ae04 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -19,7 +19,7 @@ struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ __u32 portid; __u32 dst_group; - struct sock *ssk; + struct sock *sk; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8620408af57..5f648751fce 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -324,7 +324,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s } err = sk_diag_fill(sk, rep, req, - sk_user_ns(NETLINK_CB(in_skb).ssk), + sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { @@ -630,7 +630,7 @@ static int inet_csk_diag_dump(struct sock *sk, return 0; return inet_csk_diag_fill(sk, skb, r, - sk_user_ns(NETLINK_CB(cb->skb).ssk), + sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -805,7 +805,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } err = inet_diag_fill_req(skb, sk, req, - sk_user_ns(NETLINK_CB(cb->skb).ssk), + sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 369a781851a..7927db0a927 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -25,7 +25,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return 0; return inet_sk_diag_fill(sk, NULL, skb, req, - sk_user_ns(NETLINK_CB(cb->skb).ssk), + sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -71,7 +71,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = inet_sk_diag_fill(sk, NULL, rep, req, - sk_user_ns(NETLINK_CB(in_skb).ssk), + sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 1a0be2af1dd..50aaf716cd6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -824,7 +824,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, inst = instance_create(net, group_num, NETLINK_CB(skb).portid, - sk_user_ns(NETLINK_CB(skb).ssk)); + sk_user_ns(NETLINK_CB(skb).sk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); goto out; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f20a8100517..978a61f7c87 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -891,7 +891,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, if (nlk->netlink_rcv != NULL) { ret = skb->len; skb_set_owner_r(skb, sk); - NETLINK_CB(skb).ssk = ssk; + NETLINK_CB(skb).sk = ssk; nlk->netlink_rcv(skb); consume_skb(skb); } else { diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index aa36a8c8b33..7881e2fccbc 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && - sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns) + sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns) return -EOPNOTSUPP; } -- cgit v1.2.3-70-g09d2 From 0ebd0ac5ff01ebf412e1bd3c33620ef7ffc5d866 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:46:58 +0000 Subject: net: add function to allocate sk_buff head without data area Add a function to allocate a sk_buff head without any data. This will be used by memory mapped netlink to attach data from the mmaped area to the skb. Additionally change skb_release_all() to check whether the skb has a data area to allow the skb destructor to clear the data pointer in case only a head has been allocated. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ net/core/skbuff.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f5bed7b3195..2e0ced1af3b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -651,6 +651,12 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } +extern struct sk_buff *__alloc_skb_head(gfp_t priority, int node); +static inline struct sk_buff *alloc_skb_head(gfp_t priority) +{ + return __alloc_skb_head(priority, -1); +} + extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a92d9e7d10f..898cf5c566f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -179,6 +179,33 @@ out: * */ +struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) +{ + struct sk_buff *skb; + + /* Get the HEAD */ + skb = kmem_cache_alloc_node(skbuff_head_cache, + gfp_mask & ~__GFP_DMA, node); + if (!skb) + goto out; + + /* + * Only clear those fields we need to clear, not those that we will + * actually initialise below. Hence, don't put any more fields after + * the tail pointer in struct sk_buff! + */ + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->data = NULL; + skb->truesize = sizeof(struct sk_buff); + atomic_set(&skb->users, 1); + +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->mac_header = ~0U; +#endif +out: + return skb; +} + /** * __alloc_skb - allocate a network buffer * @size: size to allocate @@ -584,7 +611,8 @@ static void skb_release_head_state(struct sk_buff *skb) static void skb_release_all(struct sk_buff *skb) { skb_release_head_state(skb); - skb_release_data(skb); + if (likely(skb->data)) + skb_release_data(skb); } /** -- cgit v1.2.3-70-g09d2 From ccdfcc398594ddf3f77348c5a10938dbe9efefbe Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:47:01 +0000 Subject: netlink: mmaped netlink: ring setup Add support for mmap'ed RX and TX ring setup and teardown based on the af_packet.c code. The following patches will use this to add the real mmap'ed receive and transmit functionality. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 32 ++++++ net/Kconfig | 9 ++ net/netlink/af_netlink.c | 268 ++++++++++++++++++++++++++++++++++++++++++- net/netlink/af_netlink.h | 20 ++++ 4 files changed, 327 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 32a354f67ba..1a85940f8ab 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -1,6 +1,7 @@ #ifndef _UAPI__LINUX_NETLINK_H #define _UAPI__LINUX_NETLINK_H +#include #include /* for __kernel_sa_family_t */ #include @@ -105,11 +106,42 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 struct nl_pktinfo { __u32 group; }; +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, + NL_MMAP_STATUS_VALID, + NL_MMAP_STATUS_COPY, + NL_MMAP_STATUS_SKIP, +}; + +#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO +#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) +#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) + #define NET_MAJOR 36 /* Major 36 is reserved for networking */ enum { diff --git a/net/Kconfig b/net/Kconfig index 2ddc9046868..1a2221630e6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -23,6 +23,15 @@ menuconfig NET if NET +config NETLINK_MMAP + bool "Netlink: mmaped IO" + help + This option enables support for memory mapped netlink IO. This + reduces overhead by avoiding copying data between kernel- and + userspace. + + If unsure, say N. + config WANT_COMPAT_NETLINK_MESSAGES bool help diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 58b9025978f..1d3c7128e90 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,234 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } +#ifdef CONFIG_NETLINK_MMAP +static __pure struct page *pgvec_to_page(const void *addr) +{ + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + else + return virt_to_page(addr); +} + +static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len; i++) { + if (pg_vec[i] != NULL) { + if (is_vmalloc_addr(pg_vec[i])) + vfree(pg_vec[i]); + else + free_pages((unsigned long)pg_vec[i], order); + } + } + kfree(pg_vec); +} + +static void *alloc_one_pg_vec_page(unsigned long order) +{ + void *buffer; + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | + __GFP_NOWARN | __GFP_NORETRY; + + buffer = (void *)__get_free_pages(gfp_flags, order); + if (buffer != NULL) + return buffer; + + buffer = vzalloc((1 << order) * PAGE_SIZE); + if (buffer != NULL) + return buffer; + + gfp_flags &= ~__GFP_NORETRY; + return (void *)__get_free_pages(gfp_flags, order); +} + +static void **alloc_pg_vec(struct netlink_sock *nlk, + struct nl_mmap_req *req, unsigned int order) +{ + unsigned int block_nr = req->nm_block_nr; + unsigned int i; + void **pg_vec, *ptr; + + pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); + if (pg_vec == NULL) + return NULL; + + for (i = 0; i < block_nr; i++) { + pg_vec[i] = ptr = alloc_one_pg_vec_page(order); + if (pg_vec[i] == NULL) + goto err1; + } + + return pg_vec; +err1: + free_pg_vec(pg_vec, order, block_nr); + return NULL; +} + +static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, + bool closing, bool tx_ring) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring; + struct sk_buff_head *queue; + void **pg_vec = NULL; + unsigned int order = 0; + int err; + + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + + if (!closing) { + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; + } + + if (req->nm_block_nr) { + if (ring->pg_vec != NULL) + return -EBUSY; + + if ((int)req->nm_block_size <= 0) + return -EINVAL; + if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) + return -EINVAL; + if (req->nm_frame_size < NL_MMAP_HDRLEN) + return -EINVAL; + if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) + return -EINVAL; + + ring->frames_per_block = req->nm_block_size / + req->nm_frame_size; + if (ring->frames_per_block == 0) + return -EINVAL; + if (ring->frames_per_block * req->nm_block_nr != + req->nm_frame_nr) + return -EINVAL; + + order = get_order(req->nm_block_size); + pg_vec = alloc_pg_vec(nlk, req, order); + if (pg_vec == NULL) + return -ENOMEM; + } else { + if (req->nm_frame_nr) + return -EINVAL; + } + + err = -EBUSY; + mutex_lock(&nlk->pg_vec_lock); + if (closing || atomic_read(&nlk->mapped) == 0) { + err = 0; + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + } + mutex_unlock(&nlk->pg_vec_lock); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); + return err; +} + +static void netlink_mm_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct socket *sock = file->private_data; + struct sock *sk = sock->sk; + + if (sk) + atomic_inc(&nlk_sk(sk)->mapped); +} + +static void netlink_mm_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct socket *sock = file->private_data; + struct sock *sk = sock->sk; + + if (sk) + atomic_dec(&nlk_sk(sk)->mapped); +} + +static const struct vm_operations_struct netlink_mmap_ops = { + .open = netlink_mm_open, + .close = netlink_mm_close, +}; + +static int netlink_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring; + unsigned long start, size, expected; + unsigned int i; + int err = -EINVAL; + + if (vma->vm_pgoff) + return -EINVAL; + + mutex_lock(&nlk->pg_vec_lock); + + expected = 0; + for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { + if (ring->pg_vec == NULL) + continue; + expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; + } + + if (expected == 0) + goto out; + + size = vma->vm_end - vma->vm_start; + if (size != expected) + goto out; + + start = vma->vm_start; + for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { + if (ring->pg_vec == NULL) + continue; + + for (i = 0; i < ring->pg_vec_len; i++) { + struct page *page; + void *kaddr = ring->pg_vec[i]; + unsigned int pg_num; + + for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { + page = pgvec_to_page(kaddr); + err = vm_insert_page(vma, start, page); + if (err < 0) + goto out; + start += PAGE_SIZE; + kaddr += PAGE_SIZE; + } + } + } + + atomic_inc(&nlk->mapped); + vma->vm_ops = &netlink_mmap_ops; + err = 0; +out: + mutex_unlock(&nlk->pg_vec_lock); + return 0; +} +#else /* CONFIG_NETLINK_MMAP */ +#define netlink_mmap sock_no_mmap +#endif /* CONFIG_NETLINK_MMAP */ + static void netlink_destroy_callback(struct netlink_callback *cb) { kfree_skb(cb->skb); @@ -146,6 +375,18 @@ static void netlink_sock_destruct(struct sock *sk) } skb_queue_purge(&sk->sk_receive_queue); +#ifdef CONFIG_NETLINK_MMAP + if (1) { + struct nl_mmap_req req; + + memset(&req, 0, sizeof(req)); + if (nlk->rx_ring.pg_vec) + netlink_set_ring(sk, &req, true, false); + memset(&req, 0, sizeof(req)); + if (nlk->tx_ring.pg_vec) + netlink_set_ring(sk, &req, true, true); + } +#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -409,6 +650,9 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); +#ifdef CONFIG_NETLINK_MMAP + mutex_init(&nlk->pg_vec_lock); +#endif sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; @@ -1211,7 +1455,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optlen >= sizeof(int) && + if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && + optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -1260,6 +1505,25 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } err = 0; break; +#ifdef CONFIG_NETLINK_MMAP + case NETLINK_RX_RING: + case NETLINK_TX_RING: { + struct nl_mmap_req req; + + /* Rings might consume more memory than queue limits, require + * CAP_NET_ADMIN. + */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (optlen < sizeof(req)) + return -EINVAL; + if (copy_from_user(&req, optval, sizeof(req))) + return -EFAULT; + err = netlink_set_ring(sk, &req, false, + optname == NETLINK_TX_RING); + break; + } +#endif /* CONFIG_NETLINK_MMAP */ default: err = -ENOPROTOOPT; } @@ -2093,7 +2357,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = sock_no_mmap, + .mmap = netlink_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index d9acb2a1d85..ed8522265f4 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -6,6 +6,20 @@ #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) +struct netlink_ring { + void **pg_vec; + unsigned int head; + unsigned int frames_per_block; + unsigned int frame_size; + unsigned int frame_max; + + unsigned int pg_vec_order; + unsigned int pg_vec_pages; + unsigned int pg_vec_len; + + atomic_t pending; +}; + struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; @@ -24,6 +38,12 @@ struct netlink_sock { void (*netlink_rcv)(struct sk_buff *skb); void (*netlink_bind)(int group); struct module *module; +#ifdef CONFIG_NETLINK_MMAP + struct mutex pg_vec_lock; + struct netlink_ring rx_ring; + struct netlink_ring tx_ring; + atomic_t mapped; +#endif /* CONFIG_NETLINK_MMAP */ }; static inline struct netlink_sock *nlk_sk(struct sock *sk) -- cgit v1.2.3-70-g09d2 From 9652e931e73be7e54a9c40e9bcd4bbdafe92a406 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:47:02 +0000 Subject: netlink: add mmap'ed netlink helper functions Add helper functions for looking up mmap'ed frame headers, reading and writing their status, allocating skbs with mmap'ed data areas and a poll function. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 7 ++ net/netlink/af_netlink.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 190 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d8e9264ae04..07c473848db 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -15,10 +15,17 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) return (struct nlmsghdr *)skb->data; } +enum netlink_skb_flags { + NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ + NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ + NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ +}; + struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ __u32 portid; __u32 dst_group; + __u32 flags; struct sock *sk; }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1d3c7128e90..6560635fd25 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -89,6 +90,7 @@ EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); +static void netlink_skb_destructor(struct sk_buff *skb); DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); @@ -109,6 +111,11 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u } #ifdef CONFIG_NETLINK_MMAP +static bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ + return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +} + static __pure struct page *pgvec_to_page(const void *addr) { if (is_vmalloc_addr(addr)) @@ -332,8 +339,154 @@ out: mutex_unlock(&nlk->pg_vec_lock); return 0; } + +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +{ +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + struct page *p_start, *p_end; + + /* First page is flushed through netlink_{get,set}_status */ + p_start = pgvec_to_page(hdr + PAGE_SIZE); + p_end = pgvec_to_page((void *)hdr + NL_MMAP_MSG_HDRLEN + hdr->nm_len - 1); + while (p_start <= p_end) { + flush_dcache_page(p_start); + p_start++; + } +#endif +} + +static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) +{ + smp_rmb(); + flush_dcache_page(pgvec_to_page(hdr)); + return hdr->nm_status; +} + +static void netlink_set_status(struct nl_mmap_hdr *hdr, + enum nl_mmap_status status) +{ + hdr->nm_status = status; + flush_dcache_page(pgvec_to_page(hdr)); + smp_wmb(); +} + +static struct nl_mmap_hdr * +__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) +{ + unsigned int pg_vec_pos, frame_off; + + pg_vec_pos = pos / ring->frames_per_block; + frame_off = pos % ring->frames_per_block; + + return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); +} + +static struct nl_mmap_hdr * +netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, + enum nl_mmap_status status) +{ + struct nl_mmap_hdr *hdr; + + hdr = __netlink_lookup_frame(ring, pos); + if (netlink_get_status(hdr) != status) + return NULL; + + return hdr; +} + +static struct nl_mmap_hdr * +netlink_current_frame(const struct netlink_ring *ring, + enum nl_mmap_status status) +{ + return netlink_lookup_frame(ring, ring->head, status); +} + +static struct nl_mmap_hdr * +netlink_previous_frame(const struct netlink_ring *ring, + enum nl_mmap_status status) +{ + unsigned int prev; + + prev = ring->head ? ring->head - 1 : ring->frame_max; + return netlink_lookup_frame(ring, prev, status); +} + +static void netlink_increment_head(struct netlink_ring *ring) +{ + ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; +} + +static void netlink_forward_ring(struct netlink_ring *ring) +{ + unsigned int head = ring->head, pos = head; + const struct nl_mmap_hdr *hdr; + + do { + hdr = __netlink_lookup_frame(ring, pos); + if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) + break; + if (hdr->nm_status != NL_MMAP_STATUS_SKIP) + break; + netlink_increment_head(ring); + } while (ring->head != head); +} + +static unsigned int netlink_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + unsigned int mask; + + mask = datagram_poll(file, sock, wait); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (nlk->rx_ring.pg_vec) { + netlink_forward_ring(&nlk->rx_ring); + if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) + mask |= POLLIN | POLLRDNORM; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + spin_lock_bh(&sk->sk_write_queue.lock); + if (nlk->tx_ring.pg_vec) { + if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) + mask |= POLLOUT | POLLWRNORM; + } + spin_unlock_bh(&sk->sk_write_queue.lock); + + return mask; +} + +static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) +{ + return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); +} + +static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, + struct netlink_ring *ring, + struct nl_mmap_hdr *hdr) +{ + unsigned int size; + void *data; + + size = ring->frame_size - NL_MMAP_HDRLEN; + data = (void *)hdr + NL_MMAP_HDRLEN; + + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; + skb->len = 0; + + skb->destructor = netlink_skb_destructor; + NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; + NETLINK_CB(skb).sk = sk; +} #else /* CONFIG_NETLINK_MMAP */ +#define netlink_skb_is_mmaped(skb) false #define netlink_mmap sock_no_mmap +#define netlink_poll datagram_poll #endif /* CONFIG_NETLINK_MMAP */ static void netlink_destroy_callback(struct netlink_callback *cb) @@ -350,7 +503,35 @@ static void netlink_consume_callback(struct netlink_callback *cb) static void netlink_skb_destructor(struct sk_buff *skb) { - sock_rfree(skb); +#ifdef CONFIG_NETLINK_MMAP + struct nl_mmap_hdr *hdr; + struct netlink_ring *ring; + struct sock *sk; + + /* If a packet from the kernel to userspace was freed because of an + * error without being delivered to userspace, the kernel must reset + * the status. In the direction userspace to kernel, the status is + * always reset here after the packet was processed and freed. + */ + if (netlink_skb_is_mmaped(skb)) { + hdr = netlink_mmap_hdr(skb); + sk = NETLINK_CB(skb).sk; + + if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { + hdr->nm_len = 0; + netlink_set_status(hdr, NL_MMAP_STATUS_VALID); + } + ring = &nlk_sk(sk)->rx_ring; + + WARN_ON(atomic_read(&ring->pending) == 0); + atomic_dec(&ring->pending); + sock_put(sk); + + skb->data = NULL; + } +#endif + if (skb->sk != NULL) + sock_rfree(skb); } static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) @@ -2349,7 +2530,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = datagram_poll, + .poll = netlink_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, -- cgit v1.2.3-70-g09d2 From f9c2288837ba072b21dba955f04a4c97eaa77b1e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:47:04 +0000 Subject: netlink: implement memory mapped recvmsg() Add support for mmap'ed recvmsg(). To allow the kernel to construct messages into the mapped area, a dataless skb is allocated and the data pointer is set to point into the ring frame. This means frames will be delivered to userspace in order of allocation instead of order of transmission. This usually doesn't matter since the order is either not determinable by userspace or message creation/transmission is serialized. The only case where this can have a visible difference is nfnetlink_queue. Userspace can't assume mmap'ed messages have ordered IDs anymore and needs to check this if using batched verdicts. For non-mapped sockets, nothing changes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 + net/netlink/af_netlink.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 143 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 07c473848db..6358da5eeee 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -64,6 +64,8 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group) extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); +extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, + u32 dst_portid, gfp_t gfp_mask); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 90504a0e42a..d120b5d4d86 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -116,6 +116,11 @@ static bool netlink_skb_is_mmaped(const struct sk_buff *skb) return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; } +static bool netlink_rx_is_mmaped(struct sock *sk) +{ + return nlk_sk(sk)->rx_ring.pg_vec != NULL; +} + static bool netlink_tx_is_mmaped(struct sock *sk) { return nlk_sk(sk)->tx_ring.pg_vec != NULL; @@ -589,8 +594,54 @@ out: mutex_unlock(&nlk->pg_vec_lock); return err; } + +static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) +{ + struct nl_mmap_hdr *hdr; + + hdr = netlink_mmap_hdr(skb); + hdr->nm_len = skb->len; + hdr->nm_group = NETLINK_CB(skb).dst_group; + hdr->nm_pid = NETLINK_CB(skb).creds.pid; + hdr->nm_uid = NETLINK_CB(skb).creds.uid; + hdr->nm_gid = NETLINK_CB(skb).creds.gid; + netlink_frame_flush_dcache(hdr); + netlink_set_status(hdr, NL_MMAP_STATUS_VALID); + + NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; + kfree_skb(skb); +} + +static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring = &nlk->rx_ring; + struct nl_mmap_hdr *hdr; + + spin_lock_bh(&sk->sk_receive_queue.lock); + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); + if (hdr == NULL) { + spin_unlock_bh(&sk->sk_receive_queue.lock); + kfree_skb(skb); + sk->sk_err = ENOBUFS; + sk->sk_error_report(sk); + return; + } + netlink_increment_head(ring); + __skb_queue_tail(&sk->sk_receive_queue, skb); + spin_unlock_bh(&sk->sk_receive_queue.lock); + + hdr->nm_len = skb->len; + hdr->nm_group = NETLINK_CB(skb).dst_group; + hdr->nm_pid = NETLINK_CB(skb).creds.pid; + hdr->nm_uid = NETLINK_CB(skb).creds.uid; + hdr->nm_gid = NETLINK_CB(skb).creds.gid; + netlink_set_status(hdr, NL_MMAP_STATUS_COPY); +} + #else /* CONFIG_NETLINK_MMAP */ #define netlink_skb_is_mmaped(skb) false +#define netlink_rx_is_mmaped(sk) false #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap #define netlink_poll datagram_poll @@ -1381,7 +1432,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; - skb_queue_tail(&sk->sk_receive_queue, skb); +#ifdef CONFIG_NETLINK_MMAP + if (netlink_skb_is_mmaped(skb)) + netlink_queue_mmaped_skb(sk, skb); + else if (netlink_rx_is_mmaped(sk)) + netlink_ring_set_copied(sk, skb); + else +#endif /* CONFIG_NETLINK_MMAP */ + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); return len; } @@ -1492,6 +1550,68 @@ retry: } EXPORT_SYMBOL(netlink_unicast); +struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, + u32 dst_portid, gfp_t gfp_mask) +{ +#ifdef CONFIG_NETLINK_MMAP + struct sock *sk = NULL; + struct sk_buff *skb; + struct netlink_ring *ring; + struct nl_mmap_hdr *hdr; + unsigned int maxlen; + + sk = netlink_getsockbyportid(ssk, dst_portid); + if (IS_ERR(sk)) + goto out; + + ring = &nlk_sk(sk)->rx_ring; + /* fast-path without atomic ops for common case: non-mmaped receiver */ + if (ring->pg_vec == NULL) + goto out_put; + + skb = alloc_skb_head(gfp_mask); + if (skb == NULL) + goto err1; + + spin_lock_bh(&sk->sk_receive_queue.lock); + /* check again under lock */ + if (ring->pg_vec == NULL) + goto out_free; + + maxlen = ring->frame_size - NL_MMAP_HDRLEN; + if (maxlen < size) + goto out_free; + + netlink_forward_ring(ring); + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); + if (hdr == NULL) + goto err2; + netlink_ring_setup_skb(skb, sk, ring, hdr); + netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); + atomic_inc(&ring->pending); + netlink_increment_head(ring); + + spin_unlock_bh(&sk->sk_receive_queue.lock); + return skb; + +err2: + kfree_skb(skb); + spin_unlock_bh(&sk->sk_receive_queue.lock); +err1: + sock_put(sk); + return NULL; + +out_free: + kfree_skb(skb); + spin_unlock_bh(&sk->sk_receive_queue.lock); +out_put: + sock_put(sk); +out: +#endif + return alloc_skb(size, gfp_mask); +} +EXPORT_SYMBOL_GPL(netlink_alloc_skb); + int netlink_has_listeners(struct sock *sk, unsigned int group) { int res = 0; @@ -2270,9 +2390,13 @@ static int netlink_dump(struct sock *sk) alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); + if (!netlink_rx_is_mmaped(sk) && + atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + goto errout_skb; + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); if (!skb) goto errout_skb; + netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); @@ -2327,6 +2451,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb == NULL) return -ENOBUFS; + /* Memory mapped dump requests need to be copied to avoid looping + * on the pending state in netlink_mmap_sendmsg() while the CB hold + * a reference to the skb. + */ + if (netlink_skb_is_mmaped(skb)) { + skb = skb_copy(skb, GFP_KERNEL); + if (skb == NULL) { + kfree(cb); + return -ENOBUFS; + } + } else + atomic_inc(&skb->users); + cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2387,7 +2524,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (err) payload += nlmsg_len(nlh); - skb = nlmsg_new(payload, GFP_KERNEL); + skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), + NETLINK_CB(in_skb).portid, GFP_KERNEL); if (!skb) { struct sock *sk; -- cgit v1.2.3-70-g09d2 From 4ae9fbee1690848a6aace1e0193ab27e981e35a5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:47:06 +0000 Subject: netlink: add RX/TX-ring support to netlink diag Based on AF_PACKET. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/uapi/linux/netlink_diag.h | 10 ++++++++++ net/netlink/diag.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index 88009a31cd0..4e31db4eea4 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -25,9 +25,18 @@ struct netlink_diag_msg { __u32 ndiag_cookie[2]; }; +struct netlink_diag_ring { + __u32 ndr_block_size; + __u32 ndr_block_nr; + __u32 ndr_frame_size; + __u32 ndr_frame_nr; +}; + enum { NETLINK_DIAG_MEMINFO, NETLINK_DIAG_GROUPS, + NETLINK_DIAG_RX_RING, + NETLINK_DIAG_TX_RING, __NETLINK_DIAG_MAX, }; @@ -38,5 +47,6 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 5ffb1d1cf40..4e4aa471cd0 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -7,6 +7,34 @@ #include "af_netlink.h" +static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, + struct sk_buff *nlskb) +{ + struct netlink_diag_ring ndr; + + ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; + ndr.ndr_block_nr = ring->pg_vec_len; + ndr.ndr_frame_size = ring->frame_size; + ndr.ndr_frame_nr = ring->frame_max + 1; + + return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); +} + +static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + int ret; + + mutex_lock(&nlk->pg_vec_lock); + ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); + if (!ret) + ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, + nlskb); + mutex_unlock(&nlk->pg_vec_lock); + + return ret; +} + static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -51,6 +79,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; + if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && + sk_diag_put_rings_cfg(sk, skb)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: -- cgit v1.2.3-70-g09d2 From ec464e5dc504a164c5dbff4a06812d495e44e34d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:47:08 +0000 Subject: netfilter: rename netlink related "pid" variables to "portid" Get rid of the confusing mix of pid and portid and use portid consistently for all netlink related socket identities. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 9 +++++---- include/net/netfilter/nf_conntrack.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 4 ++-- net/netfilter/nf_conntrack_core.c | 8 ++++---- net/netfilter/nf_conntrack_expect.c | 8 ++++---- net/netfilter/nfnetlink.c | 13 +++++++------ 6 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ecbb8e49591..60b164171da 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -29,10 +29,11 @@ extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(struct net *net, unsigned int group); -extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, - int echo, gfp_t flags); -extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); +extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, + unsigned int group, int echo, gfp_t flags); +extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); +extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, + u32 portid, int flags); extern void nfnl_lock(__u8 subsys_id); extern void nfnl_unlock(__u8 subsys_id); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index caca0c4d6b4..644d9c223d2 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -184,7 +184,7 @@ extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); extern void nf_ct_dying_timeout(struct nf_conn *ct); -extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); +extern void nf_conntrack_flush_report(struct net *net, u32 portid, int report); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cbbae7621e2..3f3aecbc863 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -88,7 +88,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, - u32 pid, int report); + u32 portid, int report); static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { nf_ct_unlink_expect_report(exp, 0, 0); @@ -106,7 +106,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report); + u32 portid, int report); static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect) { return nf_ct_expect_related_report(expect, 0, 0); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 007e8c43d19..54ddc2f8e7c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1260,7 +1260,7 @@ void nf_ct_iterate_cleanup(struct net *net, EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); struct __nf_ct_flush_report { - u32 pid; + u32 portid; int report; }; @@ -1275,7 +1275,7 @@ static int kill_report(struct nf_conn *i, void *data) /* If we fail to deliver the event, death_by_timeout() will retry */ if (nf_conntrack_event_report(IPCT_DESTROY, i, - fr->pid, fr->report) < 0) + fr->portid, fr->report) < 0) return 1; /* Avoid the delivery of the destroy event in death_by_timeout(). */ @@ -1298,10 +1298,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush_report(struct net *net, u32 pid, int report) +void nf_conntrack_flush_report(struct net *net, u32 portid, int report) { struct __nf_ct_flush_report fr = { - .pid = pid, + .portid = portid, .report = report, }; nf_ct_iterate_cleanup(net, kill_report, &fr); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 8c10e3db3d9..0adfdcc68ba 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -40,7 +40,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, - u32 pid, int report) + u32 portid, int report) { struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); @@ -54,7 +54,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, hlist_del(&exp->lnode); master_help->expecting[exp->class]--; - nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); + nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report); nf_ct_expect_put(exp); NF_CT_STAT_INC(net, expect_delete); @@ -412,7 +412,7 @@ out: } int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report) + u32 portid, int report) { int ret; @@ -425,7 +425,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret < 0) goto out; spin_unlock_bh(&nf_conntrack_lock); - nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); + nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); return ret; out: spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index bc4c499adb1..1640bd7dcdf 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -112,22 +112,23 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { - return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) { - return netlink_set_err(net->nfnl, pid, group, error); + return netlink_set_err(net->nfnl, portid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, + int flags) { - return netlink_unicast(net->nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, portid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); -- cgit v1.2.3-70-g09d2 From 3ab1f683bf8be7aa7869cc3ffb8d1db2ec8c8307 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Apr 2013 06:47:09 +0000 Subject: nfnetlink: add support for memory mapped netlink Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 2 ++ net/netfilter/nfnetlink.c | 7 +++++++ net/netfilter/nfnetlink_log.c | 10 ++++++---- net/netfilter/nfnetlink_queue_core.c | 3 ++- 4 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 60b164171da..cadb7402d7a 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -29,6 +29,8 @@ extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(struct net *net, unsigned int group); +extern struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, + u32 dst_portid, gfp_t gfp_mask); extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 1640bd7dcdf..572d87dc116 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -112,6 +112,13 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); +struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, + u32 dst_portid, gfp_t gfp_mask) +{ + return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask); +} +EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb); + int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 50aaf716cd6..d4199eb9b33 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -318,7 +318,7 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) } static struct sk_buff * -nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) +nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -327,13 +327,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = alloc_skb(n, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = alloc_skb(pkt_size, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(&init_net, pkt_size, + peer_portid, GFP_ATOMIC); if (!skb) pr_err("nfnetlink_log: can't even alloc %u bytes\n", pkt_size); @@ -696,7 +697,8 @@ nfulnl_log_packet(u_int8_t pf, } if (!inst->skb) { - inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size); + inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz, + size); if (!inst->skb) goto alloc_failure; } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 5e280b3e154..ef3cdb4bfee 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -339,7 +339,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); - skb = alloc_skb(size, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid, + GFP_ATOMIC); if (!skb) return NULL; -- cgit v1.2.3-70-g09d2 From 6e94d1ef37e439bf45659cc071553574ccb98080 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Apr 2013 01:29:10 +0000 Subject: net: socket: move ktime2ts to ktime header api Currently, ktime2ts is a small helper function that is only used in net/socket.c. Move this helper into the ktime API as a small inline function, so that i) it's maintained together with ktime routines, and ii) also other files can make use of it. The function is named ktime_to_timespec_cond() and placed into the generic part of ktime, since we internally make use of ktime_to_timespec(). ktime_to_timespec() itself does not check the ktime variable for zero, hence, we name this function ktime_to_timespec_cond() for only a conditional conversion, and adapt its users to it. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/ktime.h | 18 ++++++++++++++++++ net/socket.c | 20 ++++---------------- 2 files changed, 22 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index e83512f63df..bbca12804d1 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -330,6 +330,24 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); +/** + * ktime_to_timespec_cond - convert a ktime_t variable to timespec + * format only if the variable contains data + * @kt: the ktime_t variable to convert + * @ts: the timespec variable to store the result in + * + * Returns true if there was a successful conversion, false if kt was 0. + */ +static inline bool ktime_to_timespec_cond(const ktime_t kt, struct timespec *ts) +{ + if (kt.tv64) { + *ts = ktime_to_timespec(kt); + return true; + } else { + return false; + } +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an diff --git a/net/socket.c b/net/socket.c index 36883fea44f..280283f03cc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -681,16 +681,6 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); -static int ktime2ts(ktime_t kt, struct timespec *ts) -{ - if (kt.tv64) { - *ts = ktime_to_timespec(kt); - return 1; - } else { - return 0; - } -} - /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@ -723,17 +713,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, memset(ts, 0, sizeof(ts)); - if (skb->tstamp.tv64 && - sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { - skb_get_timestampns(skb, ts + 0); + if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec_cond(skb->tstamp, ts + 0)) empty = 0; - } if (shhwtstamps) { if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && - ktime2ts(shhwtstamps->syststamp, ts + 1)) + ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1)) empty = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && - ktime2ts(shhwtstamps->hwtstamp, ts + 2)) + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) empty = 0; } if (!empty) -- cgit v1.2.3-70-g09d2 From 9fae27b33785cb6350f21449073d9f7d1a2bbfd6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 20 Apr 2013 23:51:41 +0000 Subject: net: vlan: fix dummy function signatures for CONFIG_VLAN=n Fix up some function signatures for CONFIG_VLAN=n that were missed during the 802.1ad support patches. Found by the kbuild robot. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a78f9390da8..52bd03b3896 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -104,7 +104,8 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, extern bool vlan_uses_dev(const struct net_device *dev); #else static inline struct net_device * -__vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id) +__vlan_find_dev_deep(struct net_device *real_dev, + __be16 vlan_proto, u16 vlan_id) { return NULL; } @@ -131,12 +132,12 @@ static inline struct sk_buff *vlan_untag(struct sk_buff *skb) return skb; } -static inline int vlan_vid_add(struct net_device *dev, unsigned short vid) +static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { return 0; } -static inline void vlan_vid_del(struct net_device *dev, unsigned short vid) +static inline void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid) { } -- cgit v1.2.3-70-g09d2 From 908f8d07e9774c2476e0683f6a0ce50562a2da45 Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Sun, 7 Apr 2013 09:53:30 +0300 Subject: mac80211: indicate admission control in TX queue parameters Some driver implementations need to know whether mandatory admission control is required by the AP for some ACs. Add a parameter to the TX queue parameters indicating this. As there's currently no support for admission control in mac80211's AP implementation, it's only ever set for the client implementation. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/mlme.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4f693a5c54d..b77d57a070e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -128,6 +128,7 @@ enum ieee80211_ac_numbers { * 2^n-1 in the range 1..32767] * @cw_max: maximum contention window [like @cw_min] * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled + * @acm: is mandatory admission control required for the access category * @uapsd: is U-APSD mode enabled for the queue */ struct ieee80211_tx_queue_params { @@ -135,6 +136,7 @@ struct ieee80211_tx_queue_params { u16 cw_min; u16 cw_max; u8 aifs; + bool acm; bool uapsd; }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f7beb12abde..13bb81402e1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1661,6 +1661,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); params.cw_min = ecw2cw(pos[1] & 0x0f); params.txop = get_unaligned_le16(pos + 2); + params.acm = acm; params.uapsd = uapsd; mlme_dbg(sdata, -- cgit v1.2.3-70-g09d2 From 5de17984898c5758fc6ebe08eccea9f4b6548914 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 18 Apr 2013 15:49:00 +0200 Subject: cfg80211: introduce critical protocol indication from user-space Some protocols need a more reliable connection to complete successful in reasonable time. This patch adds a user-space API to indicate the wireless driver that a critical protocol is about to commence and when it is done, using nl80211 primitives NL80211_CMD_CRIT_PROTOCOL_START and NL80211_CRIT_PROTOCOL_STOP. There can be only on critical protocol session started per registered cfg80211 device. The driver can support this by implementing the cfg80211 callbacks .crit_proto_start() and .crit_proto_stop(). Examples of protocols that can benefit from this are DHCP, EAPOL, APIPA. Exactly how the link can/should be made more reliable is up to the driver. Things to consider are avoid scanning, no multi-channel operations, and alter coexistence schemes. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky (Zhenhui) Lin Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 23 +++++++++ include/uapi/linux/nl80211.h | 39 +++++++++++++++ net/wireless/core.h | 3 ++ net/wireless/mlme.c | 5 ++ net/wireless/nl80211.c | 117 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 24 ++++++++- net/wireless/trace.h | 35 +++++++++++++ 7 files changed, 245 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dff96d8cafc..26b5b692c22 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2002,6 +2002,12 @@ struct cfg80211_update_ft_ies_params { * @update_ft_ies: Provide updated Fast BSS Transition information to the * driver. If the SME is in the driver/firmware, this information can be * used in building Authentication and Reassociation Request frames. + * + * @crit_proto_start: Indicates a critical protocol needs more link reliability + * for a given duration (milliseconds). The protocol is provided so the + * driver can take the most appropriate actions. + * @crit_proto_stop: Indicates critical protocol no longer needs increased link + * reliability. This operation can not fail. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2231,6 +2237,12 @@ struct cfg80211_ops { struct cfg80211_chan_def *chandef); int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_update_ft_ies_params *ftie); + int (*crit_proto_start)(struct wiphy *wiphy, + struct wireless_dev *wdev, + enum nl80211_crit_proto_id protocol, + u16 duration); + void (*crit_proto_stop)(struct wiphy *wiphy, + struct wireless_dev *wdev); }; /* @@ -4137,6 +4149,17 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp); +/** + * cfg80211_crit_proto_stopped() - indicate critical protocol stopped by driver. + * + * @wdev: the wireless device for which critical protocol is stopped. + * + * This function can be called by the driver to indicate it has reverted + * operation back to normal. One reason could be that the duration given + * by .crit_proto_start() has expired. + */ +void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 79da8710448..d1e48b5e348 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -639,6 +639,13 @@ * with the relevant Information Elements. This event is used to report * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). * + * @NL80211_CMD_CRIT_PROTOCOL_START: Indicates user-space will start running + * a critical protocol that needs more reliability in the connection to + * complete. + * + * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can + * return back to normal. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -798,6 +805,9 @@ enum nl80211_commands { NL80211_CMD_UPDATE_FT_IES, NL80211_CMD_FT_EVENT, + NL80211_CMD_CRIT_PROTOCOL_START, + NL80211_CMD_CRIT_PROTOCOL_STOP, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1414,6 +1424,11 @@ enum nl80211_commands { * @NL80211_ATTR_IE_RIC: Resource Information Container Information * Element * + * @NL80211_ATTR_CRIT_PROT_ID: critical protocol identifier requiring increased + * reliability, see &enum nl80211_crit_proto_id (u16). + * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which + * the connection should have increased reliability (u16). + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1709,6 +1724,9 @@ enum nl80211_attrs { NL80211_ATTR_MDID, NL80211_ATTR_IE_RIC, + NL80211_ATTR_CRIT_PROT_ID, + NL80211_ATTR_MAX_CRIT_PROT_DURATION, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3682,4 +3700,25 @@ enum nl80211_protocol_features { NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP = 1 << 0, }; +/** + * enum nl80211_crit_proto_id - nl80211 critical protocol identifiers + * + * @NL80211_CRIT_PROTO_UNSPEC: protocol unspecified. + * @NL80211_CRIT_PROTO_DHCP: BOOTP or DHCPv6 protocol. + * @NL80211_CRIT_PROTO_EAPOL: EAPOL protocol. + * @NL80211_CRIT_PROTO_APIPA: APIPA protocol. + * @NUM_NL80211_CRIT_PROTO: must be kept last. + */ +enum nl80211_crit_proto_id { + NL80211_CRIT_PROTO_UNSPEC, + NL80211_CRIT_PROTO_DHCP, + NL80211_CRIT_PROTO_EAPOL, + NL80211_CRIT_PROTO_APIPA, + /* add other protocols before this one */ + NUM_NL80211_CRIT_PROTO +}; + +/* maximum duration for critical protocol measures */ +#define NL80211_CRIT_PROTO_MAX_DURATION 5000 /* msec */ + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.h b/net/wireless/core.h index 124e5e773fb..fd35dae547c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -88,6 +88,9 @@ struct cfg80211_registered_device { struct delayed_work dfs_update_channels_wk; + /* netlink port which started critical protocol (0 means not started) */ + u32 crit_proto_nlportid; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 390198bf4b3..0c7b7dd855f 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -648,6 +648,11 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) spin_unlock_bh(&wdev->mgmt_registrations_lock); + if (nlportid && rdev->crit_proto_nlportid == nlportid) { + rdev->crit_proto_nlportid = 0; + rdev_crit_proto_stop(rdev, wdev); + } + if (nlportid == wdev->ap_unexpected_nlportid) wdev->ap_unexpected_nlportid = 0; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3abcbbada6d..afa283841e8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1424,6 +1424,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); + if (split) { + CMD(crit_proto_start, CRIT_PROTOCOL_START); + CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); + } #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -8216,6 +8220,64 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) return rdev_update_ft_ies(rdev, dev, &ft_params); } +static int nl80211_crit_protocol_start(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC; + u16 duration; + int ret; + + if (!rdev->ops->crit_proto_start) + return -EOPNOTSUPP; + + if (WARN_ON(!rdev->ops->crit_proto_stop)) + return -EINVAL; + + if (rdev->crit_proto_nlportid) + return -EBUSY; + + /* determine protocol if provided */ + if (info->attrs[NL80211_ATTR_CRIT_PROT_ID]) + proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]); + + if (proto >= NUM_NL80211_CRIT_PROTO) + return -EINVAL; + + /* timeout must be provided */ + if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]) + return -EINVAL; + + duration = + nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]); + + if (duration > NL80211_CRIT_PROTO_MAX_DURATION) + return -ERANGE; + + ret = rdev_crit_proto_start(rdev, wdev, proto, duration); + if (!ret) + rdev->crit_proto_nlportid = info->snd_portid; + + return ret; +} + +static int nl80211_crit_protocol_stop(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (!rdev->ops->crit_proto_stop) + return -EOPNOTSUPP; + + if (rdev->crit_proto_nlportid) { + rdev->crit_proto_nlportid = 0; + rdev_crit_proto_stop(rdev, wdev); + } + return 0; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -8905,6 +8967,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_CRIT_PROTOCOL_START, + .doit = nl80211_crit_protocol_start, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, + .doit = nl80211_crit_protocol_stop, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + } }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -10650,6 +10728,45 @@ void cfg80211_ft_event(struct net_device *netdev, } EXPORT_SYMBOL(cfg80211_ft_event); +void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) +{ + struct cfg80211_registered_device *rdev; + struct sk_buff *msg; + void *hdr; + u32 nlportid; + + rdev = wiphy_to_dev(wdev->wiphy); + if (!rdev->crit_proto_nlportid) + return; + + nlportid = rdev->crit_proto_nlportid; + rdev->crit_proto_nlportid = 0; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); + return; + + nla_put_failure: + if (hdr) + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + +} +EXPORT_SYMBOL(cfg80211_crit_proto_stopped); + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index d77e1c1d3a0..9f15f0ac824 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,7 +875,7 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, trace_rdev_stop_p2p_device(&rdev->wiphy, wdev); rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); -} +} static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -901,4 +901,26 @@ static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_crit_proto_id protocol, + u16 duration) +{ + int ret; + + trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration); + ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev, + protocol, duration); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_crit_proto_stop(&rdev->wiphy, wdev); + rdev->ops->crit_proto_stop(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 3c2033b8f59..ecd4fcec3c9 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1806,6 +1806,41 @@ TRACE_EVENT(rdev_update_ft_ies, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md) ); +TRACE_EVENT(rdev_crit_proto_start, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + enum nl80211_crit_proto_id protocol, u16 duration), + TP_ARGS(wiphy, wdev, protocol, duration), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u16, proto) + __field(u16, duration) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->proto = protocol; + __entry->duration = duration; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration) +); + +TRACE_EVENT(rdev_crit_proto_stop, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, + WIPHY_PR_ARG, WDEV_PR_ARG) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3-70-g09d2 From 0d528d85c519b755b6f4e1bafa3a39984370e1c1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Apr 2013 16:14:41 +0200 Subject: mac80211: improve the rate control API Allow rate control modules to pass a rate selection table to mac80211 and the driver. This allows drivers to fetch the most recent rate selection from the sta pointer for already buffered frames. This allows rate control to respond faster to sudden link changes and it is also a step towards adding minstrel_ht support to drivers like iwlwifi. When a driver sets IEEE80211_HW_SUPPORTS_RC_TABLE, mac80211 will not fill info->control.rates with rates from the rate table (to preserve explicit overrides by the rate control module). The driver then explicitly calls ieee80211_get_tx_rates to merge overrides from info->control.rates with defaults from the sta rate table. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 66 +++++++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/rate.c | 327 +++++++++++++++++++++++++++++++++++++-------- net/mac80211/tx.c | 142 +++++--------------- 4 files changed, 369 insertions(+), 167 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b77d57a070e..04c2d4670dc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -563,6 +563,9 @@ enum mac80211_rate_control_flags { /* maximum number of rate stages */ #define IEEE80211_TX_MAX_RATES 4 +/* maximum number of rate table entries */ +#define IEEE80211_TX_RATE_TABLE_SIZE 4 + /** * struct ieee80211_tx_rate - rate selection/status * @@ -659,6 +662,8 @@ struct ieee80211_tx_info { s8 rts_cts_rate_idx; u8 use_rts:1; u8 use_cts_prot:1; + u8 short_preamble:1; + u8 skip_table:1; /* 2 bytes free */ }; /* only needed before rate control */ @@ -680,6 +685,8 @@ struct ieee80211_tx_info { struct { struct ieee80211_tx_rate driver_rates[ IEEE80211_TX_MAX_RATES]; + u8 pad[4]; + void *rate_driver_data[ IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE / sizeof(void *)]; }; @@ -1224,6 +1231,24 @@ enum ieee80211_sta_rx_bandwidth { IEEE80211_STA_RX_BW_160, }; +/** + * struct ieee80211_sta_rates - station rate selection table + * + * @rcu_head: RCU head used for freeing the table on update + * @rates: transmit rates/flags to be used by default. + * Overriding entries per-packet is possible by using cb tx control. + */ +struct ieee80211_sta_rates { + struct rcu_head rcu_head; + struct { + s8 idx; + u8 count; + u8 count_cts; + u8 count_rts; + u16 flags; + } rate[IEEE80211_TX_RATE_TABLE_SIZE]; +}; + /** * struct ieee80211_sta - station table entry * @@ -1251,6 +1276,7 @@ enum ieee80211_sta_rx_bandwidth { * notifications and capabilities. The value is only valid after * the station moves to associated state. * @smps_mode: current SMPS mode (off, static or dynamic) + * @tx_rates: rate control selection table */ struct ieee80211_sta { u32 supp_rates[IEEE80211_NUM_BANDS]; @@ -1264,6 +1290,7 @@ struct ieee80211_sta { u8 rx_nss; enum ieee80211_sta_rx_bandwidth bandwidth; enum ieee80211_smps_mode smps_mode; + struct ieee80211_sta_rates __rcu *rates; /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); @@ -1419,6 +1446,9 @@ struct ieee80211_tx_control { * for different virtual interfaces. See the doc section on HW queue * control for more details. * + * @IEEE80211_HW_SUPPORTS_RC_TABLE: The driver supports using a rate + * selection table provided by the rate control algorithm. + * * @IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF: Use the P2P Device address for any * P2P Interface. This will be honoured even if more than one interface * is supported. @@ -1451,6 +1481,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, IEEE80211_HW_AP_LINK_PS = 1<<22, IEEE80211_HW_TX_AMPDU_SETUP_IN_HW = 1<<23, + IEEE80211_HW_SUPPORTS_RC_TABLE = 1<<24, IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF = 1<<25, IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, }; @@ -3136,6 +3167,25 @@ static inline int ieee80211_sta_ps_transition_ni(struct ieee80211_sta *sta, void ieee80211_sta_set_buffered(struct ieee80211_sta *sta, u8 tid, bool buffered); +/** + * ieee80211_get_tx_rates - get the selected transmit rates for a packet + * + * Call this function in a driver with per-packet rate selection support + * to combine the rate info in the packet tx info with the most recent + * rate selection table for the station entry. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @sta: the receiver station to which this packet is sent. + * @skb: the frame to be transmitted. + * @dest: buffer for extracted rate/retry information + * @max_rates: maximum number of rates to fetch + */ +void ieee80211_get_tx_rates(struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct sk_buff *skb, + struct ieee80211_tx_rate *dest, + int max_rates); + /** * ieee80211_tx_status - transmit status callback * @@ -4212,6 +4262,22 @@ bool rate_usable_index_exists(struct ieee80211_supported_band *sband, return false; } +/** + * rate_control_set_rates - pass the sta rate selection to mac80211/driver + * + * When not doing a rate control probe to test rates, rate control should pass + * its rate selection to mac80211. If the driver supports receiving a station + * rate table, it will use it to ensure that frames are always sent based on + * the most recent rate control module decision. + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @pubsta: &struct ieee80211_sta pointer to the target destination. + * @rates: new tx rate set to be used for this station. + */ +int rate_control_set_rates(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct ieee80211_sta_rates *rates); + int ieee80211_rate_control_register(struct rate_control_ops *ops); void ieee80211_rate_control_unregister(struct rate_control_ops *ops); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index af8410e1291..158e6eb188d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -156,6 +156,7 @@ struct ieee80211_tx_data { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; struct ieee80211_key *key; + struct ieee80211_tx_rate rate; unsigned int flags; }; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 5d545dd2d05..0d51877efdb 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -252,6 +252,25 @@ rate_lowest_non_cck_index(struct ieee80211_supported_band *sband, return 0; } +static void __rate_control_send_low(struct ieee80211_hw *hw, + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct ieee80211_tx_info *info) +{ + if ((sband->band != IEEE80211_BAND_2GHZ) || + !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) + info->control.rates[0].idx = rate_lowest_index(sband, sta); + else + info->control.rates[0].idx = + rate_lowest_non_cck_index(sband, sta); + + info->control.rates[0].count = + (info->flags & IEEE80211_TX_CTL_NO_ACK) ? + 1 : hw->max_rate_tries; + + info->control.skip_table = 1; +} + bool rate_control_send_low(struct ieee80211_sta *sta, void *priv_sta, @@ -262,16 +281,8 @@ bool rate_control_send_low(struct ieee80211_sta *sta, int mcast_rate; if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { - if ((sband->band != IEEE80211_BAND_2GHZ) || - !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) - info->control.rates[0].idx = - rate_lowest_index(txrc->sband, sta); - else - info->control.rates[0].idx = - rate_lowest_non_cck_index(txrc->sband, sta); - info->control.rates[0].count = - (info->flags & IEEE80211_TX_CTL_NO_ACK) ? - 1 : txrc->hw->max_rate_tries; + __rate_control_send_low(txrc->hw, sband, sta, info); + if (!sta && txrc->bss) { mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; if (mcast_rate > 0) { @@ -355,7 +366,8 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate, static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, - struct ieee80211_tx_rate_control *txrc, + struct ieee80211_supported_band *sband, + enum nl80211_chan_width chan_width, u32 mask, u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]) { @@ -375,27 +387,17 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, IEEE80211_TX_RC_USE_SHORT_PREAMBLE); alt_rate.count = rate->count; if (rate_idx_match_legacy_mask(&alt_rate, - txrc->sband->n_bitrates, - mask)) { + sband->n_bitrates, mask)) { *rate = alt_rate; return; } } else { - struct sk_buff *skb = txrc->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - __le16 fc; - /* handle legacy rates */ - if (rate_idx_match_legacy_mask(rate, txrc->sband->n_bitrates, - mask)) + if (rate_idx_match_legacy_mask(rate, sband->n_bitrates, mask)) return; /* if HT BSS, and we handle a data frame, also try HT rates */ - if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) - return; - - fc = hdr->frame_control; - if (!ieee80211_is_data(fc)) + if (chan_width == NL80211_CHAN_WIDTH_20_NOHT) return; alt_rate.idx = 0; @@ -408,7 +410,7 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, alt_rate.flags |= IEEE80211_TX_RC_MCS; - if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_40) + if (chan_width == NL80211_CHAN_WIDTH_40) alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) { @@ -426,6 +428,228 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, */ } +static void rate_fixup_ratelist(struct ieee80211_vif *vif, + struct ieee80211_supported_band *sband, + struct ieee80211_tx_info *info, + struct ieee80211_tx_rate *rates, + int max_rates) +{ + struct ieee80211_rate *rate; + bool inval = false; + int i; + + /* + * Set up the RTS/CTS rate as the fastest basic rate + * that is not faster than the data rate unless there + * is no basic rate slower than the data rate, in which + * case we pick the slowest basic rate + * + * XXX: Should this check all retry rates? + */ + if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) { + u32 basic_rates = vif->bss_conf.basic_rates; + s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; + + rate = &sband->bitrates[rates[0].idx]; + + for (i = 0; i < sband->n_bitrates; i++) { + /* must be a basic rate */ + if (!(basic_rates & BIT(i))) + continue; + /* must not be faster than the data rate */ + if (sband->bitrates[i].bitrate > rate->bitrate) + continue; + /* maximum */ + if (sband->bitrates[baserate].bitrate < + sband->bitrates[i].bitrate) + baserate = i; + } + + info->control.rts_cts_rate_idx = baserate; + } + + for (i = 0; i < max_rates; i++) { + /* + * make sure there's no valid rate following + * an invalid one, just in case drivers don't + * take the API seriously to stop at -1. + */ + if (inval) { + rates[i].idx = -1; + continue; + } + if (rates[i].idx < 0) { + inval = true; + continue; + } + + /* + * For now assume MCS is already set up correctly, this + * needs to be fixed. + */ + if (rates[i].flags & IEEE80211_TX_RC_MCS) { + WARN_ON(rates[i].idx > 76); + + if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && + info->control.use_cts_prot) + rates[i].flags |= + IEEE80211_TX_RC_USE_CTS_PROTECT; + continue; + } + + if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) { + WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9); + continue; + } + + /* set up RTS protection if desired */ + if (info->control.use_rts) { + rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS; + info->control.use_cts_prot = false; + } + + /* RC is busted */ + if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) { + rates[i].idx = -1; + continue; + } + + rate = &sband->bitrates[rates[i].idx]; + + /* set up short preamble */ + if (info->control.short_preamble && + rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) + rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + + /* set up G protection */ + if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && + info->control.use_cts_prot && + rate->flags & IEEE80211_RATE_ERP_G) + rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; + } +} + + +static void rate_control_fill_sta_table(struct ieee80211_sta *sta, + struct ieee80211_tx_info *info, + struct ieee80211_tx_rate *rates, + int max_rates) +{ + struct ieee80211_sta_rates *ratetbl = NULL; + int i; + + if (sta && !info->control.skip_table) + ratetbl = rcu_dereference(sta->rates); + + /* Fill remaining rate slots with data from the sta rate table. */ + max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE); + for (i = 0; i < max_rates; i++) { + if (i < ARRAY_SIZE(info->control.rates) && + info->control.rates[i].idx >= 0 && + info->control.rates[i].count) { + if (rates != info->control.rates) + rates[i] = info->control.rates[i]; + } else if (ratetbl) { + rates[i].idx = ratetbl->rate[i].idx; + rates[i].flags = ratetbl->rate[i].flags; + if (info->control.use_rts) + rates[i].count = ratetbl->rate[i].count_rts; + else if (info->control.use_cts_prot) + rates[i].count = ratetbl->rate[i].count_cts; + else + rates[i].count = ratetbl->rate[i].count; + } else { + rates[i].idx = -1; + rates[i].count = 0; + } + + if (rates[i].idx < 0 || !rates[i].count) + break; + } +} + +static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_supported_band *sband, + struct ieee80211_tx_info *info, + struct ieee80211_tx_rate *rates, + int max_rates) +{ + enum nl80211_chan_width chan_width; + u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; + bool has_mcs_mask; + u32 mask; + int i; + + /* + * Try to enforce the rateidx mask the user wanted. skip this if the + * default mask (allow all rates) is used to save some processing for + * the common case. + */ + mask = sdata->rc_rateidx_mask[info->band]; + has_mcs_mask = sdata->rc_has_mcs_mask[info->band]; + if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask) + return; + + if (has_mcs_mask) + memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band], + sizeof(mcs_mask)); + else + memset(mcs_mask, 0xff, sizeof(mcs_mask)); + + if (sta) { + /* Filter out rates that the STA does not support */ + mask &= sta->supp_rates[info->band]; + for (i = 0; i < sizeof(mcs_mask); i++) + mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i]; + } + + /* + * Make sure the rate index selected for each TX rate is + * included in the configured mask and change the rate indexes + * if needed. + */ + chan_width = sdata->vif.bss_conf.chandef.width; + for (i = 0; i < max_rates; i++) { + /* Skip invalid rates */ + if (rates[i].idx < 0) + break; + + rate_idx_match_mask(&rates[i], sband, mask, chan_width, + mcs_mask); + } +} + +void ieee80211_get_tx_rates(struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct sk_buff *skb, + struct ieee80211_tx_rate *dest, + int max_rates) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_supported_band *sband; + + rate_control_fill_sta_table(sta, info, dest, max_rates); + + if (!vif) + return; + + sdata = vif_to_sdata(vif); + sband = sdata->local->hw.wiphy->bands[info->band]; + + if (ieee80211_is_data(hdr->frame_control)) + rate_control_apply_mask(sdata, sta, sband, info, dest, max_rates); + + if (dest[0].idx < 0) + __rate_control_send_low(&sdata->local->hw, sband, sta, info); + + if (sta) + rate_fixup_ratelist(vif, sband, info, dest, max_rates); +} +EXPORT_SYMBOL(ieee80211_get_tx_rates); + void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_tx_rate_control *txrc) @@ -435,8 +659,6 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *ista = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); int i; - u32 mask; - u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) { ista = &sta->sta; @@ -454,40 +676,27 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); - /* - * Try to enforce the rateidx mask the user wanted. skip this if the - * default mask (allow all rates) is used to save some processing for - * the common case. - */ - mask = sdata->rc_rateidx_mask[info->band]; - if (mask != (1 << txrc->sband->n_bitrates) - 1 || txrc->rate_idx_mcs_mask) { - if (txrc->rate_idx_mcs_mask) - memcpy(mcs_mask, txrc->rate_idx_mcs_mask, sizeof(mcs_mask)); - else - memset(mcs_mask, 0xff, sizeof(mcs_mask)); - - if (sta) { - /* Filter out rates that the STA does not support */ - mask &= sta->sta.supp_rates[info->band]; - for (i = 0; i < sizeof(mcs_mask); i++) - mcs_mask[i] &= sta->sta.ht_cap.mcs.rx_mask[i]; - } - /* - * Make sure the rate index selected for each TX rate is - * included in the configured mask and change the rate indexes - * if needed. - */ - for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - /* Skip invalid rates */ - if (info->control.rates[i].idx < 0) - break; - rate_idx_match_mask(&info->control.rates[i], txrc, - mask, mcs_mask); - } - } + if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE) + return; + + ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb, + info->control.rates, + ARRAY_SIZE(info->control.rates)); +} - BUG_ON(info->control.rates[0].idx < 0); +int rate_control_set_rates(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct ieee80211_sta_rates *rates) +{ + struct ieee80211_sta_rates *old = rcu_dereference(pubsta->rates); + + rcu_assign_pointer(pubsta->rates, rates); + if (old) + kfree_rcu(old, rcu_head); + + return 0; } +EXPORT_SYMBOL(rate_control_set_rates); int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6ca857f8f42..4a5fbf83cd1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -48,15 +48,15 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); /* assume HW handles this */ - if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS) + if (tx->rate.flags & IEEE80211_TX_RC_MCS) return 0; /* uh huh? */ - if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) + if (WARN_ON_ONCE(tx->rate.idx < 0)) return 0; sband = local->hw.wiphy->bands[info->band]; - txrate = &sband->bitrates[info->control.rates[0].idx]; + txrate = &sband->bitrates[tx->rate.idx]; erp = txrate->flags & IEEE80211_RATE_ERP_G; @@ -617,11 +617,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (void *)tx->skb->data; struct ieee80211_supported_band *sband; - struct ieee80211_rate *rate; - int i; u32 len; - bool inval = false, rts = false, short_preamble = false; struct ieee80211_tx_rate_control txrc; + struct ieee80211_sta_rates *ratetbl = NULL; bool assoc = false; memset(&txrc, 0, sizeof(txrc)); @@ -653,10 +651,10 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) /* set up RTS protection if desired */ if (len > tx->local->hw.wiphy->rts_threshold) { - txrc.rts = rts = true; + txrc.rts = true; } - info->control.use_rts = rts; + info->control.use_rts = txrc.rts; info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot; /* @@ -668,7 +666,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (tx->sdata->vif.bss_conf.use_short_preamble && (ieee80211_is_data(hdr->frame_control) || (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) - txrc.short_preamble = short_preamble = true; + txrc.short_preamble = true; + + info->control.short_preamble = txrc.short_preamble; if (tx->sta) assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); @@ -692,16 +692,38 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) */ rate_control_get_rate(tx->sdata, tx->sta, &txrc); - if (unlikely(info->control.rates[0].idx < 0)) - return TX_DROP; + if (tx->sta && !info->control.skip_table) + ratetbl = rcu_dereference(tx->sta->sta.rates); + + if (unlikely(info->control.rates[0].idx < 0)) { + if (ratetbl) { + struct ieee80211_tx_rate rate = { + .idx = ratetbl->rate[0].idx, + .flags = ratetbl->rate[0].flags, + .count = ratetbl->rate[0].count + }; + + if (ratetbl->rate[0].idx < 0) + return TX_DROP; + + tx->rate = rate; + } else { + return TX_DROP; + } + } else { + tx->rate = info->control.rates[0]; + } if (txrc.reported_rate.idx < 0) { - txrc.reported_rate = info->control.rates[0]; + txrc.reported_rate = tx->rate; if (tx->sta && ieee80211_is_data(hdr->frame_control)) tx->sta->last_tx_rate = txrc.reported_rate; } else if (tx->sta) tx->sta->last_tx_rate = txrc.reported_rate; + if (ratetbl) + return TX_CONTINUE; + if (unlikely(!info->control.rates[0].count)) info->control.rates[0].count = 1; @@ -709,102 +731,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) (info->flags & IEEE80211_TX_CTL_NO_ACK))) info->control.rates[0].count = 1; - if (is_multicast_ether_addr(hdr->addr1)) { - /* - * XXX: verify the rate is in the basic rateset - */ - return TX_CONTINUE; - } - - /* - * Set up the RTS/CTS rate as the fastest basic rate - * that is not faster than the data rate unless there - * is no basic rate slower than the data rate, in which - * case we pick the slowest basic rate - * - * XXX: Should this check all retry rates? - */ - if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) { - u32 basic_rates = tx->sdata->vif.bss_conf.basic_rates; - s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; - - rate = &sband->bitrates[info->control.rates[0].idx]; - - for (i = 0; i < sband->n_bitrates; i++) { - /* must be a basic rate */ - if (!(basic_rates & BIT(i))) - continue; - /* must not be faster than the data rate */ - if (sband->bitrates[i].bitrate > rate->bitrate) - continue; - /* maximum */ - if (sband->bitrates[baserate].bitrate < - sband->bitrates[i].bitrate) - baserate = i; - } - - info->control.rts_cts_rate_idx = baserate; - } - - for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - struct ieee80211_tx_rate *rc_rate = &info->control.rates[i]; - - /* - * make sure there's no valid rate following - * an invalid one, just in case drivers don't - * take the API seriously to stop at -1. - */ - if (inval) { - rc_rate->idx = -1; - continue; - } - if (rc_rate->idx < 0) { - inval = true; - continue; - } - - /* - * For now assume MCS is already set up correctly, this - * needs to be fixed. - */ - if (rc_rate->flags & IEEE80211_TX_RC_MCS) { - WARN_ON(rc_rate->idx > 76); - - if (!(rc_rate->flags & IEEE80211_TX_RC_USE_RTS_CTS) && - tx->sdata->vif.bss_conf.use_cts_prot) - rc_rate->flags |= - IEEE80211_TX_RC_USE_CTS_PROTECT; - continue; - } - - if (rc_rate->flags & IEEE80211_TX_RC_VHT_MCS) { - WARN_ON(ieee80211_rate_get_vht_mcs(rc_rate) > 9); - continue; - } - - /* set up RTS protection if desired */ - if (rts) - rc_rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; - - /* RC is busted */ - if (WARN_ON_ONCE(rc_rate->idx >= sband->n_bitrates)) { - rc_rate->idx = -1; - continue; - } - - rate = &sband->bitrates[rc_rate->idx]; - - /* set up short preamble */ - if (short_preamble && - rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) - rc_rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; - - /* set up G protection */ - if (!rts && tx->sdata->vif.bss_conf.use_cts_prot && - rate->flags & IEEE80211_RATE_ERP_G) - rc_rate->flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; - } - return TX_CONTINUE; } -- cgit v1.2.3-70-g09d2 From 9ecd1a75d977e2e8c48139c7d3efed183f898d94 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 22 Apr 2013 02:20:41 +0000 Subject: xen-netfront: reduce gso_max_size to account for max TCP header The maximum packet including header that can be handled by netfront / netback wire format is 65535. Reduce gso_max_size accordingly. Drop skb and print warning when skb->len > 65535. This can 1) save the effort to send malformed packet to netback, 2) help spotting misconfiguration of netfront in the future. Signed-off-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 17 +++++++++++++++-- include/xen/interface/io/netif.h | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 1bb2e2072a6..1db10141506 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -547,6 +547,16 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int len = skb_headlen(skb); unsigned long flags; + /* If skb->len is too big for wire format, drop skb and alert + * user about misconfiguration. + */ + if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) { + net_alert_ratelimited( + "xennet: skb->len = %u, too big for wire format\n", + skb->len); + goto drop; + } + slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) + xennet_count_skb_frag_slots(skb); if (unlikely(slots > MAX_SKB_FRAGS + 1)) { @@ -1058,7 +1068,8 @@ err: static int xennet_change_mtu(struct net_device *dev, int mtu) { - int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + int max = xennet_can_sg(dev) ? + XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN; if (mtu > max) return -EINVAL; @@ -1362,6 +1373,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); SET_NETDEV_DEV(netdev, &dev->dev); + netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); + np->netdev = netdev; netif_carrier_off(netdev); diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index 9dfc1200098..58fadcac33a 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -47,6 +47,7 @@ #define _XEN_NETTXF_extra_info (3) #define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info) +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF struct xen_netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ uint16_t offset; /* Offset within buffer page */ -- cgit v1.2.3-70-g09d2 From 2810e5b9a7731ca5fce22bfbe12c96e16ac44b6f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 22 Apr 2013 02:20:42 +0000 Subject: xen-netback: coalesce slots in TX path and fix regressions This patch tries to coalesce tx requests when constructing grant copy structures. It enables netback to deal with situation when frontend's MAX_SKB_FRAGS is larger than backend's MAX_SKB_FRAGS. With the help of coalescing, this patch tries to address two regressions avoid reopening the security hole in XSA-39. Regression 1. The reduction of the number of supported ring entries (slots) per packet (from 18 to 17). This regression has been around for some time but remains unnoticed until XSA-39 security fix. This is fixed by coalescing slots. Regression 2. The XSA-39 security fix turning "too many frags" errors from just dropping the packet to a fatal error and disabling the VIF. This is fixed by coalescing slots (handling 18 slots when backend's MAX_SKB_FRAGS is 17) which rules out false positive (using 18 slots is legit) and dropping packets using 19 to `max_skb_slots` slots. To avoid reopening security hole in XSA-39, frontend sending packet using more than max_skb_slots is considered malicious. The behavior of netback for packet is thus: 1-18 slots: valid 19-max_skb_slots slots: drop and respond with an error max_skb_slots+ slots: fatal error max_skb_slots is configurable by admin, default value is 20. Also change variable name from "frags" to "slots" in netbk_count_requests. Please note that RX path still has dependency on MAX_SKB_FRAGS. This will be fixed with separate patch. Signed-off-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 275 +++++++++++++++++++++++++++++++------- include/xen/interface/io/netif.h | 18 +++ 2 files changed, 242 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 9f718440426..d9292c59789 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -47,11 +47,25 @@ #include #include +/* + * This is the maximum slots a skb can have. If a guest sends a skb + * which exceeds this limit it is considered malicious. + */ +#define MAX_SKB_SLOTS_DEFAULT 20 +static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT; +module_param(max_skb_slots, uint, 0444); + +typedef unsigned int pending_ring_idx_t; +#define INVALID_PENDING_RING_IDX (~0U) + struct pending_tx_info { - struct xen_netif_tx_request req; + struct xen_netif_tx_request req; /* coalesced tx request */ struct xenvif *vif; + pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX + * if it is head of one or more tx + * reqs + */ }; -typedef unsigned int pending_ring_idx_t; struct netbk_rx_meta { int id; @@ -102,7 +116,11 @@ struct xen_netbk { atomic_t netfront_count; struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; + /* Coalescing tx requests before copying makes number of grant + * copy ops greater or equal to number of slots required. In + * worst case a tx request consumes 2 gnttab_copy. + */ + struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; u16 pending_ring[MAX_PENDING_REQS]; @@ -118,6 +136,16 @@ struct xen_netbk { static struct xen_netbk *xen_netbk; static int xen_netbk_group_nr; +/* + * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of + * one or more merged tx requests, otherwise it is the continuation of + * previous tx request. + */ +static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx) +{ + return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; +} + void xen_netbk_add_xenvif(struct xenvif *vif) { int i; @@ -250,6 +278,7 @@ static int max_required_rx_slots(struct xenvif *vif) { int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (vif->can_sg || vif->gso || vif->gso_prefix) max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ @@ -657,6 +686,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) __skb_queue_tail(&rxq, skb); /* Filled the batch queue? */ + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) break; } @@ -898,47 +928,78 @@ static void netbk_fatal_tx_err(struct xenvif *vif) static int netbk_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, + RING_IDX first_idx, struct xen_netif_tx_request *txp, int work_to_do) { RING_IDX cons = vif->tx.req_cons; - int frags = 0; + int slots = 0; + int drop_err = 0; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { - if (frags >= work_to_do) { - netdev_err(vif->dev, "Need more frags\n"); + if (slots >= work_to_do) { + netdev_err(vif->dev, + "Asked for %d slots but exceeds this limit\n", + work_to_do); netbk_fatal_tx_err(vif); return -ENODATA; } - if (unlikely(frags >= MAX_SKB_FRAGS)) { - netdev_err(vif->dev, "Too many frags\n"); + /* This guest is really using too many slots and + * considered malicious. + */ + if (unlikely(slots >= max_skb_slots)) { + netdev_err(vif->dev, + "Malicious frontend using %d slots, threshold %u\n", + slots, max_skb_slots); netbk_fatal_tx_err(vif); return -E2BIG; } - memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), + /* Xen network protocol had implicit dependency on + * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the + * historical MAX_SKB_FRAGS value 18 to honor the same + * behavior as before. Any packet using more than 18 + * slots but less than max_skb_slots slots is dropped + */ + if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) { + if (net_ratelimit()) + netdev_dbg(vif->dev, + "Too many slots (%d) exceeding limit (%d), dropping packet\n", + slots, XEN_NETIF_NR_SLOTS_MIN); + drop_err = -E2BIG; + } + + memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); if (txp->size > first->size) { - netdev_err(vif->dev, "Frag is bigger than frame.\n"); + netdev_err(vif->dev, + "Invalid tx request, slot size %u > remaining size %u\n", + txp->size, first->size); netbk_fatal_tx_err(vif); return -EIO; } first->size -= txp->size; - frags++; + slots++; if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { - netdev_err(vif->dev, "txp->offset: %x, size: %u\n", + netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", txp->offset, txp->size); netbk_fatal_tx_err(vif); return -EINVAL; } } while ((txp++)->flags & XEN_NETTXF_more_data); - return frags; + + if (drop_err) { + netbk_tx_err(vif, first, first_idx + slots); + return drop_err; + } + + return slots; } static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, @@ -962,48 +1023,114 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; u16 pending_idx = *((u16 *)skb->data); - int i, start; + u16 head_idx = 0; + int slot, start; + struct page *page; + pending_ring_idx_t index, start_idx = 0; + uint16_t dst_offset; + unsigned int nr_slots; + struct pending_tx_info *first = NULL; + + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN. + */ + nr_slots = shinfo->nr_frags; /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); - for (i = start; i < shinfo->nr_frags; i++, txp++) { - struct page *page; - pending_ring_idx_t index; + /* Coalesce tx requests, at this point the packet passed in + * should be <= 64K. Any packets larger than 64K have been + * handled in netbk_count_requests(). + */ + for (shinfo->nr_frags = slot = start; slot < nr_slots; + shinfo->nr_frags++) { struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; - index = pending_index(netbk->pending_cons++); - pending_idx = netbk->pending_ring[index]; - page = xen_netbk_alloc_page(netbk, pending_idx); + page = alloc_page(GFP_KERNEL|__GFP_COLD); if (!page) goto err; - gop->source.u.ref = txp->gref; - gop->source.domid = vif->domid; - gop->source.offset = txp->offset; - - gop->dest.u.gmfn = virt_to_mfn(page_address(page)); - gop->dest.domid = DOMID_SELF; - gop->dest.offset = txp->offset; - - gop->len = txp->size; - gop->flags = GNTCOPY_source_gref; + dst_offset = 0; + first = NULL; + while (dst_offset < PAGE_SIZE && slot < nr_slots) { + gop->flags = GNTCOPY_source_gref; + + gop->source.u.ref = txp->gref; + gop->source.domid = vif->domid; + gop->source.offset = txp->offset; + + gop->dest.domid = DOMID_SELF; + + gop->dest.offset = dst_offset; + gop->dest.u.gmfn = virt_to_mfn(page_address(page)); + + if (dst_offset + txp->size > PAGE_SIZE) { + /* This page can only merge a portion + * of tx request. Do not increment any + * pointer / counter here. The txp + * will be dealt with in future + * rounds, eventually hitting the + * `else` branch. + */ + gop->len = PAGE_SIZE - dst_offset; + txp->offset += gop->len; + txp->size -= gop->len; + dst_offset += gop->len; /* quit loop */ + } else { + /* This tx request can be merged in the page */ + gop->len = txp->size; + dst_offset += gop->len; + + index = pending_index(netbk->pending_cons++); + + pending_idx = netbk->pending_ring[index]; + + memcpy(&pending_tx_info[pending_idx].req, txp, + sizeof(*txp)); + xenvif_get(vif); + + pending_tx_info[pending_idx].vif = vif; + + /* Poison these fields, corresponding + * fields for head tx req will be set + * to correct values after the loop. + */ + netbk->mmap_pages[pending_idx] = (void *)(~0UL); + pending_tx_info[pending_idx].head = + INVALID_PENDING_RING_IDX; + + if (!first) { + first = &pending_tx_info[pending_idx]; + start_idx = index; + head_idx = pending_idx; + } + + txp++; + slot++; + } - gop++; + gop++; + } - memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); - xenvif_get(vif); - pending_tx_info[pending_idx].vif = vif; - frag_set_pending_idx(&frags[i], pending_idx); + first->req.offset = 0; + first->req.size = dst_offset; + first->head = start_idx; + set_page_ext(page, netbk, head_idx); + netbk->mmap_pages[head_idx] = page; + frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); } + BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); + return gop; err: /* Unwind, freeing all pages and sending error responses. */ - while (i-- > start) { - xen_netbk_idx_release(netbk, frag_get_pending_idx(&frags[i]), - XEN_NETIF_RSP_ERROR); + while (shinfo->nr_frags-- > start) { + xen_netbk_idx_release(netbk, + frag_get_pending_idx(&frags[shinfo->nr_frags]), + XEN_NETIF_RSP_ERROR); } /* The head too, if necessary. */ if (start) @@ -1019,8 +1146,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, struct gnttab_copy *gop = *gopp; u16 pending_idx = *((u16 *)skb->data); struct skb_shared_info *shinfo = skb_shinfo(skb); + struct pending_tx_info *tx_info; int nr_frags = shinfo->nr_frags; int i, err, start; + u16 peek; /* peek into next tx request */ /* Check status of header. */ err = gop->status; @@ -1032,11 +1161,20 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, for (i = start; i < nr_frags; i++) { int j, newerr; + pending_ring_idx_t head; pending_idx = frag_get_pending_idx(&shinfo->frags[i]); + tx_info = &netbk->pending_tx_info[pending_idx]; + head = tx_info->head; /* Check error status: if okay then remember grant handle. */ - newerr = (++gop)->status; + do { + newerr = (++gop)->status; + if (newerr) + break; + peek = netbk->pending_ring[pending_index(++head)]; + } while (!pending_tx_is_head(netbk, peek)); + if (likely(!newerr)) { /* Had a previous error? Invalidate this fragment. */ if (unlikely(err)) @@ -1256,11 +1394,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) struct sk_buff *skb; int ret; - while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; + struct xen_netif_tx_request txfrags[max_skb_slots]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; @@ -1321,7 +1460,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) continue; } - ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); + ret = netbk_count_requests(vif, &txreq, idx, + txfrags, work_to_do); if (unlikely(ret < 0)) continue; @@ -1348,7 +1488,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) pending_idx = netbk->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && - ret < MAX_SKB_FRAGS) ? + ret < XEN_NETIF_NR_SLOTS_MIN) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, @@ -1398,6 +1538,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) memcpy(&netbk->pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); netbk->pending_tx_info[pending_idx].vif = vif; + netbk->pending_tx_info[pending_idx].head = index; *((u16 *)skb->data) = pending_idx; __skb_put(skb, data_len); @@ -1528,7 +1669,10 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, { struct xenvif *vif; struct pending_tx_info *pending_tx_info; - pending_ring_idx_t index; + pending_ring_idx_t head; + u16 peek; /* peek into next tx request */ + + BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL)); /* Already complete? */ if (netbk->mmap_pages[pending_idx] == NULL) @@ -1537,19 +1681,40 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, pending_tx_info = &netbk->pending_tx_info[pending_idx]; vif = pending_tx_info->vif; + head = pending_tx_info->head; - make_tx_response(vif, &pending_tx_info->req, status); + BUG_ON(!pending_tx_is_head(netbk, head)); + BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx); - index = pending_index(netbk->pending_prod++); - netbk->pending_ring[index] = pending_idx; + do { + pending_ring_idx_t index; + pending_ring_idx_t idx = pending_index(head); + u16 info_idx = netbk->pending_ring[idx]; - xenvif_put(vif); + pending_tx_info = &netbk->pending_tx_info[info_idx]; + make_tx_response(vif, &pending_tx_info->req, status); + + /* Setting any number other than + * INVALID_PENDING_RING_IDX indicates this slot is + * starting a new packet / ending a previous packet. + */ + pending_tx_info->head = 0; + + index = pending_index(netbk->pending_prod++); + netbk->pending_ring[index] = netbk->pending_ring[info_idx]; - netbk->mmap_pages[pending_idx]->mapping = NULL; + xenvif_put(vif); + + peek = netbk->pending_ring[pending_index(++head)]; + + } while (!pending_tx_is_head(netbk, peek)); + + netbk->mmap_pages[pending_idx]->mapping = 0; put_page(netbk->mmap_pages[pending_idx]); netbk->mmap_pages[pending_idx] = NULL; } + static void make_tx_response(struct xenvif *vif, struct xen_netif_tx_request *txp, s8 st) @@ -1602,8 +1767,9 @@ static inline int rx_work_todo(struct xen_netbk *netbk) static inline int tx_work_todo(struct xen_netbk *netbk) { - if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && - !list_empty(&netbk->net_schedule_list)) + if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) return 1; return 0; @@ -1686,6 +1852,13 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; + if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) { + printk(KERN_INFO + "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n", + max_skb_slots, XEN_NETIF_NR_SLOTS_MIN); + max_skb_slots = XEN_NETIF_NR_SLOTS_MIN; + } + xen_netbk_group_nr = num_online_cpus(); xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); if (!xen_netbk) diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index 58fadcac33a..3ef3fe05ee9 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -12,6 +12,24 @@ #include #include +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + /* * Notifications after enqueuing any type of message should be conditional on * the appropriate req_event or rsp_event field in the shared ring. -- cgit v1.2.3-70-g09d2 From 3fb62c5d3fc1821f50c6003e582713857a520f6b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Apr 2013 14:29:25 -0700 Subject: net: remove a stale comment for dl_next dl_next member in struct request_sock doesn't need to be first. We expect to insert a "struct common_sock" or a subset of it, so this claim had to be verified. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9069e65c1c5..59795e42c8b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -48,7 +48,7 @@ extern int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); /* struct request_sock - mini sock to represent a connection request */ struct request_sock { - struct request_sock *dl_next; /* Must be first member! */ + struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ -- cgit v1.2.3-70-g09d2 From 3e3251b3f289528732edab386ddf73ac428359b7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Apr 2013 21:59:37 +0000 Subject: net: sctp: minor: remove dead code from sctp_packet struct sctp_packet is currently embedded into sctp_transport or sits on the stack as 'singleton' in sctp_outq_flush(). Therefore, its member 'malloced' is always 0, thus a kfree() is never called. Because of that, we can just remove this code. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 +-- net/sctp/output.c | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 64d469845f2..1bd4c4144fe 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -714,8 +714,7 @@ struct sctp_packet { has_sack:1, /* This packet contains a SACK chunk. */ has_auth:1, /* This packet contains an AUTH chunk */ has_data:1, /* This packet contains at least 1 DATA chunk */ - ipfragok:1, /* So let ip fragment this packet */ - malloced:1; /* Is it malloced? */ + ipfragok:1; /* So let ip fragment this packet */ }; struct sctp_packet *sctp_packet_init(struct sctp_packet *, diff --git a/net/sctp/output.c b/net/sctp/output.c index f5200a2ad85..bbef4a7a9b5 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, packet->overhead = overhead; sctp_packet_reset(packet); packet->vtag = 0; - packet->malloced = 0; + return packet; } @@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet) list_del_init(&chunk->list); sctp_chunk_free(chunk); } - - if (packet->malloced) - kfree(packet); } /* This routine tries to append the chunk to the offered packet. If adding -- cgit v1.2.3-70-g09d2 From 0a925864c1038a78fd1cc9b048d9a2b1ae04b63e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 17 Apr 2013 23:50:49 +0300 Subject: ipvs: fix sparse warnings for some parameters Some service fields are in network order: - netmask: used once in network order and also as prefix len for IPv6 - port Other parameters are in host order: - struct ip_vs_flags: flags and mask moved between user and kernel only - sync state: moved between user and kernel only - syncid: sent over network as single octet Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 8 ++++---- include/uapi/linux/ip_vs.h | 4 ++-- net/netfilter/ipvs/ip_vs_core.c | 3 ++- net/netfilter/ipvs/ip_vs_ctl.c | 40 ++++++++++++++++++++++++---------------- 4 files changed, 32 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f9f5b057b48..4c062ccff9a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -678,7 +678,7 @@ struct ip_vs_service_user_kern { u16 af; u16 protocol; union nf_inet_addr addr; /* virtual ip address */ - u16 port; + __be16 port; u32 fwmark; /* firwall mark of service */ /* virtual service options */ @@ -686,14 +686,14 @@ struct ip_vs_service_user_kern { char *pe_name; unsigned int flags; /* virtual service flags */ unsigned int timeout; /* persistent timeout in sec */ - u32 netmask; /* persistent netmask */ + __be32 netmask; /* persistent netmask or plen */ }; struct ip_vs_dest_user_kern { /* destination server address */ union nf_inet_addr addr; - u16 port; + __be16 port; /* real server options */ unsigned int conn_flags; /* connection flags */ @@ -721,7 +721,7 @@ struct ip_vs_service { __u32 fwmark; /* firewall mark of the service */ unsigned int flags; /* service status flags */ unsigned int timeout; /* persistent timeout in ticks */ - __be32 netmask; /* grouping granularity */ + __be32 netmask; /* grouping granularity, mask/plen */ struct net *net; struct list_head destinations; /* real server d-linked list */ diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 8a2d438dc49..a24537725e8 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -280,8 +280,8 @@ struct ip_vs_daemon_user { #define IPVS_GENL_VERSION 0x1 struct ip_vs_flags { - __be32 flags; - __be32 mask; + __u32 flags; + __u32 mask; }; /* Generic Netlink command attributes */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f26fe3353a3..a0d7bd34277 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -235,7 +235,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask); + ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, + (__force __u32) svc->netmask); else #endif snet.ip = iph->saddr.ip & svc->netmask; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 64075a775e3..5b142fb1648 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1164,9 +1164,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, } #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { - ret = -EINVAL; - goto out_err; + if (u->af == AF_INET6) { + __u32 plen = (__force __u32) u->netmask; + + if (plen < 1 || plen > 128) { + ret = -EINVAL; + goto out_err; + } } #endif @@ -1277,9 +1281,13 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { - ret = -EINVAL; - goto out; + if (u->af == AF_INET6) { + __u32 plen = (__force __u32) u->netmask; + + if (plen < 1 || plen > 128) { + ret = -EINVAL; + goto out; + } } #endif @@ -2892,7 +2900,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } else { if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || - nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port)) + nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) goto nla_put_failure; } @@ -2902,7 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || - nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) + nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) goto nla_put_failure; if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) goto nla_put_failure; @@ -3015,7 +3023,7 @@ static int ip_vs_genl_parse_service(struct net *net, } else { usvc->protocol = nla_get_u16(nla_protocol); nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); - usvc->port = nla_get_u16(nla_port); + usvc->port = nla_get_be16(nla_port); usvc->fwmark = 0; } @@ -3055,7 +3063,7 @@ static int ip_vs_genl_parse_service(struct net *net, usvc->sched_name = nla_data(nla_sched); usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; usvc->timeout = nla_get_u32(nla_timeout); - usvc->netmask = nla_get_u32(nla_netmask); + usvc->netmask = nla_get_be32(nla_netmask); } return 0; @@ -3081,7 +3089,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) return -EMSGSIZE; if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || - nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) || + nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, (atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK)) || @@ -3190,7 +3198,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, memset(udest, 0, sizeof(*udest)); nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); - udest->port = nla_get_u16(nla_port); + udest->port = nla_get_be16(nla_port); /* If a full entry was requested, check for the additional fields */ if (full_entry) { @@ -3215,8 +3223,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, return 0; } -static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, - const char *mcast_ifn, __be32 syncid) +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, + const char *mcast_ifn, __u32 syncid) { struct nlattr *nl_daemon; @@ -3237,8 +3245,8 @@ nla_put_failure: return -EMSGSIZE; } -static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, - const char *mcast_ifn, __be32 syncid, +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, + const char *mcast_ifn, __u32 syncid, struct netlink_callback *cb) { void *hdr; -- cgit v1.2.3-70-g09d2 From 26ee65e680f4a2291f6258e11beceae0ad4eeba3 Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Mon, 22 Apr 2013 23:57:01 +0000 Subject: caif: Remove my bouncing email address. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove my soon bouncing email address. Also remove the "Contact:" line in file header. The MAINTAINERS file is a better place to find the contact person anyway. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 1 - drivers/net/caif/caif_serial.c | 4 ++-- drivers/net/caif/caif_spi.c | 1 - drivers/net/caif/caif_spi_slave.c | 1 - include/net/caif/caif_dev.h | 2 +- include/net/caif/caif_device.h | 2 +- include/net/caif/caif_hsi.h | 1 - include/net/caif/caif_layer.h | 2 +- include/net/caif/cfcnfg.h | 2 +- include/net/caif/cfctrl.h | 2 +- include/net/caif/cffrml.h | 2 +- include/net/caif/cfmuxl.h | 2 +- include/net/caif/cfpkt.h | 2 +- include/net/caif/cfserl.h | 2 +- include/net/caif/cfsrvl.h | 2 +- include/uapi/linux/caif/caif_socket.h | 2 +- include/uapi/linux/caif/if_caif.h | 2 +- net/caif/caif_dev.c | 2 +- net/caif/caif_socket.c | 2 +- net/caif/caif_usb.c | 2 +- net/caif/cfcnfg.c | 2 +- net/caif/cfctrl.c | 2 +- net/caif/cfdbgl.c | 2 +- net/caif/cfdgml.c | 2 +- net/caif/cffrml.c | 2 +- net/caif/cfmuxl.c | 2 +- net/caif/cfpkt_skbuff.c | 2 +- net/caif/cfrfml.c | 2 +- net/caif/cfserl.c | 2 +- net/caif/cfsrvl.c | 2 +- net/caif/cfutill.c | 2 +- net/caif/cfveil.c | 2 +- net/caif/cfvidl.c | 2 +- net/caif/chnl_net.c | 2 +- 34 files changed, 31 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 0def8b3106f..77f50749acb 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / daniel.martensson@stericsson.com * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com * License terms: GNU General Public License (GPL) version 2. diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 666891a9a24..e56b56c08b2 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -21,7 +21,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sjur Brendeland"); +MODULE_AUTHOR("Sjur Brendeland"); MODULE_DESCRIPTION("CAIF serial device TTY line discipline"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_CAIF); diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index b71ce9bf0af..c2cb852e13b 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / Daniel.Martensson@stericsson.com * License terms: GNU General Public License (GPL) version 2. */ diff --git a/drivers/net/caif/caif_spi_slave.c b/drivers/net/caif/caif_spi_slave.c index e139e133fc7..98f03663aa2 100644 --- a/drivers/net/caif/caif_spi_slave.c +++ b/drivers/net/caif/caif_spi_slave.c @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / Daniel.Martensson@stericsson.com * License terms: GNU General Public License (GPL) version 2. */ diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h index ef2dd9438bb..028b754ae9b 100644 --- a/include/net/caif/caif_dev.h +++ b/include/net/caif/caif_dev.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/caif_device.h b/include/net/caif/caif_device.h index d02f044adb8..d6e3c4267c8 100644 --- a/include/net/caif/caif_device.h +++ b/include/net/caif/caif_device.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index bcb9cc3ce98..4795e817afe 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / daniel.martensson@stericsson.com * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com * License terms: GNU General Public License (GPL) version 2 diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 0f3a39125f9..94e5ed64dc6 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h index 90b4ff8bad8..70bfd017581 100644 --- a/include/net/caif/cfcnfg.h +++ b/include/net/caif/cfcnfg.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h index 9e5425b4a1d..f2ae33d23ba 100644 --- a/include/net/caif/cfctrl.h +++ b/include/net/caif/cfctrl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cffrml.h b/include/net/caif/cffrml.h index afac1a48cce..a06e33fbaa8 100644 --- a/include/net/caif/cffrml.h +++ b/include/net/caif/cffrml.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfmuxl.h b/include/net/caif/cfmuxl.h index 5847a196b8a..752999572f2 100644 --- a/include/net/caif/cfmuxl.h +++ b/include/net/caif/cfmuxl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index 83a89ba3005..1c1ad46250d 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h index f121299a342..b5b020f3c72 100644 --- a/include/net/caif/cfserl.h +++ b/include/net/caif/cfserl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h index 0f590524184..cd47705c2cc 100644 --- a/include/net/caif/cfsrvl.h +++ b/include/net/caif/cfsrvl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/uapi/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h index 3f3bac6af7b..586e9f98184 100644 --- a/include/uapi/linux/caif/caif_socket.h +++ b/include/uapi/linux/caif/caif_socket.h @@ -1,7 +1,7 @@ /* linux/caif_socket.h * CAIF Definitions for CAIF socket and network layer * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/uapi/linux/caif/if_caif.h b/include/uapi/linux/caif/if_caif.h index 5e7eed4edf5..7618aabe8c6 100644 --- a/include/uapi/linux/caif/if_caif.h +++ b/include/uapi/linux/caif/if_caif.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index df6d56d8689..1f9ece1a9c3 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -1,7 +1,7 @@ /* * CAIF Interface registration. * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 * * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 630b8be6e74..05a41c7ec30 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index d76278d644b..942e00a425f 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -1,7 +1,7 @@ /* * CAIF USB handler * Copyright (C) ST-Ericsson AB 2011 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 * */ diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 246ac3aa8de..fa39fc29870 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 9cd057c59c5..2bd4b58f437 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 2914659eb9b..7aae0b56829 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index a63f4a5f5af..3bdddb32d55 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 204c5e226a6..8bc7caa28e6 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -2,7 +2,7 @@ * CAIF Framing Layer. * * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 154d9f8f964..8c5d6386319 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index e8f9c149504..6493351f39c 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index db51830c858..61d7617d924 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 147c232b128..ce60f06d76d 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 95f7f5ea30e..353f793d1b3 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 86d2dadb4b7..1728fa4471c 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 910ab0661f6..262224581ef 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index a8e2a2d758a..b3b110e8a35 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 26a4e4e3a76..89586c05a47 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Authors: Sjur Brendeland/sjur.brandeland@stericsson.com + * Authors: Sjur Brendeland * Daniel Martensson / Daniel.Martensson@stericsson.com * License terms: GNU General Public License (GPL) version 2 */ -- cgit v1.2.3-70-g09d2 From d998735f443427c1530cac5eeda0a45c8cb60a57 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Tue, 23 Apr 2013 06:06:47 +0000 Subject: net/mlx4_core: Add timestamping device capability Add new device capability for timestamping support and query FW to retrieve it. Signed-off-by: Eugenia Emantayev Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 7 ++++++- include/linux/mlx4/device.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index ab470d991ad..f1e70979b4c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -130,7 +130,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", [3] = "Device manage flow steering support", - [4] = "Automatic mac reassignment support" + [4] = "Automatic MAC reassignment support", + [5] = "Time stamping support" }; int i; @@ -444,6 +445,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38 #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c +#define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 @@ -560,6 +562,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; + MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_TS; MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1bc5a750b33..86ae260c2e5 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -152,7 +152,8 @@ enum { MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, - MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4 + MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4, + MLX4_DEV_CAP_FLAG2_TS = 1LL << 5 }; enum { -- cgit v1.2.3-70-g09d2 From ddd8a6c12d7e494902a9435a9a7a543ef730b2d8 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Tue, 23 Apr 2013 06:06:48 +0000 Subject: net/mlx4_core: Read HCA frequency and map internal clock Read HCA frequency, read PCI clock bar and offset, map internal clock to PCI bar. Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 11 ++++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 57 +++++++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +++- include/linux/mlx4/device.h | 1 + 5 files changed, 75 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index f1e70979b4c..6776c257bd3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1013,6 +1013,9 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) #define QUERY_FW_COMM_BASE_OFFSET 0x40 #define QUERY_FW_COMM_BAR_OFFSET 0x48 +#define QUERY_FW_CLOCK_OFFSET 0x50 +#define QUERY_FW_CLOCK_BAR 0x58 + mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); @@ -1087,6 +1090,12 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) fw->comm_bar, fw->comm_base); mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2); + MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET); + MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR); + fw->clock_bar = (fw->clock_bar >> 6) * 2; + mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n", + fw->clock_bar, fw->clock_offset); + /* * Round up number of system pages needed in case * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. @@ -1374,6 +1383,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, u8 byte_field; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 +#define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) @@ -1388,6 +1398,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, goto out; MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET); + MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); /* QPC/EEC/CQC/EQC/RDMARC attributes */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 151c2bb380a..fdf41665a05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -162,6 +162,7 @@ struct mlx4_init_hca_param { u64 global_caps; u16 log_mc_entry_sz; u16 log_mc_hash_sz; + u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */ u8 log_num_qps; u8 log_num_srqs; u8 log_num_cqs; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 16abde20e1f..e81840faa6c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -513,6 +513,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; + dev->caps.hca_core_clock = hca_param.hca_core_clock; + memset(&dev_cap, 0, sizeof(dev_cap)); dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; err = mlx4_dev_cap(dev, &dev_cap); @@ -1226,8 +1228,31 @@ static void unmap_bf_area(struct mlx4_dev *dev) io_mapping_free(mlx4_priv(dev)->bf_mapping); } +static int map_internal_clock(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->clock_mapping = + ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + + priv->fw.clock_offset, MLX4_CLOCK_SIZE); + + if (!priv->clock_mapping) + return -ENOMEM; + + return 0; +} + +static void unmap_internal_clock(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (priv->clock_mapping) + iounmap(priv->clock_mapping); +} + static void mlx4_close_hca(struct mlx4_dev *dev) { + unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) mlx4_slave_exit(dev); @@ -1445,6 +1470,37 @@ static int mlx4_init_hca(struct mlx4_dev *dev) mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); goto err_free_icm; } + /* + * If TS is supported by FW + * read HCA frequency by QUERY_HCA command + */ + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { + memset(&init_hca, 0, sizeof(init_hca)); + err = mlx4_QUERY_HCA(dev, &init_hca); + if (err) { + mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + } else { + dev->caps.hca_core_clock = + init_hca.hca_core_clock; + } + + /* In case we got HCA frequency 0 - disable timestamping + * to avoid dividing by zero + */ + if (!dev->caps.hca_core_clock) { + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_err(dev, + "HCA frequency is 0. Timestamping is not supported."); + } else if (map_internal_clock(dev)) { + /* + * Map internal clock, + * in case of failure disable timestamping + */ + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); + } + } } else { err = mlx4_init_slave(dev); if (err) { @@ -1478,6 +1534,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) return 0; unmap_bf: + unmap_internal_clock(dev); unmap_bf_area(dev); err_close: diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 252f4ba7f32..0567f01938e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -87,7 +87,8 @@ enum { MLX4_HCR_SIZE = 0x0001c, MLX4_CLR_INT_SIZE = 0x00008, MLX4_SLAVE_COMM_BASE = 0x0, - MLX4_COMM_PAGESIZE = 0x1000 + MLX4_COMM_PAGESIZE = 0x1000, + MLX4_CLOCK_SIZE = 0x00008 }; enum { @@ -403,6 +404,7 @@ struct mlx4_fw { u64 clr_int_base; u64 catas_offset; u64 comm_base; + u64 clock_offset; struct mlx4_icm *fw_icm; struct mlx4_icm *aux_icm; u32 catas_size; @@ -410,6 +412,7 @@ struct mlx4_fw { u8 clr_int_bar; u8 catas_bar; u8 comm_bar; + u8 clock_bar; }; struct mlx4_comm { @@ -826,6 +829,7 @@ struct mlx4_priv { struct list_head bf_list; struct mutex bf_mutex; struct io_mapping *bf_mapping; + void __iomem *clock_mapping; int reserved_mtts; int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 86ae260c2e5..e088290d979 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -445,6 +445,7 @@ struct mlx4_caps { u8 eqe_factor; u32 userspace_caps; /* userspace must be aware of these */ u32 function_caps; /* VFs must be aware of these */ + u16 hca_core_clock; }; struct mlx4_buf_list { -- cgit v1.2.3-70-g09d2 From ec693d47010e8302e61e0bdf3f47496c5610641a Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 23 Apr 2013 06:06:49 +0000 Subject: net/mlx4_en: Add HW timestamping (TS) support The patch allows to enable/disable HW timestamping for incoming and/or outgoing packets. It adds and initializes all structs and callbacks needed by kernel TS API. To enable/disable HW timestamping appropriate ioctl should be used. Currently HWTSTAMP_FILTER_ALL/NONE and HWTSAMP_TX_ON/OFF only are supported. When enabling TS on receive flow - VLAN stripping will be disabled. Also were made all relevant changes in RX/TX flows to consider TS request and plant HW timestamps into relevant structures. mlx4_ib was fixed to compile with new mlx4_cq_alloc() signature. Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/net/ethernet/mellanox/mlx4/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx4/cq.c | 10 +- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 132 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_cq.c | 10 +- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 30 +++++ drivers/net/ethernet/mellanox/mlx4/en_main.c | 5 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 75 ++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_resources.c | 3 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 29 ++++- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 29 ++++- drivers/net/ethernet/mellanox/mlx4/main.c | 22 ++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 21 +++- include/linux/mlx4/cq.h | 16 +++ include/linux/mlx4/device.h | 6 +- 15 files changed, 375 insertions(+), 17 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx4/en_clock.c (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index ae67df35dd4..73b3a713258 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -228,7 +228,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector vector = dev->eq_table[vector % ibdev->num_comp_vectors]; err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, vector, 0); + cq->db.dma, &cq->mcq, vector, 0, 0); if (err) goto err_dbmap; diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile index 293127d28b3..3e9c70f15b4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Makefile +++ b/drivers/net/ethernet/mellanox/mlx4/Makefile @@ -6,5 +6,5 @@ mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ obj-$(CONFIG_MLX4_EN) += mlx4_en.o mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \ - en_resources.o en_netdev.o en_selftest.o + en_resources.o en_netdev.o en_selftest.o en_clock.o mlx4_en-$(CONFIG_MLX4_EN_DCB) += en_dcb_nl.o diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 0706623cfb9..004e4231af6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -240,9 +240,10 @@ static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) __mlx4_cq_free_icm(dev, cqn); } -int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, - struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - unsigned vector, int collapsed) +int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, + struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, + struct mlx4_cq *cq, unsigned vector, int collapsed, + int timestamp_en) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; @@ -276,6 +277,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, memset(cq_context, 0, sizeof *cq_context); cq_context->flags = cpu_to_be32(!!collapsed << 18); + if (timestamp_en) + cq_context->flags |= cpu_to_be32(1 << 19); + cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); cq_context->comp_eqn = priv->eq_table.eq[vector].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c new file mode 100644 index 00000000000..501c72f1fbe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include + +#include "mlx4_en.h" + +int mlx4_en_timestamp_config(struct net_device *dev, int tx_type, int rx_filter) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int port_up = 0; + int err = 0; + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_free_resources(priv); + + en_warn(priv, "Changing Time Stamp configuration\n"); + + priv->hwtstamp_config.tx_type = tx_type; + priv->hwtstamp_config.rx_filter = rx_filter; + + if (rx_filter != HWTSTAMP_FILTER_NONE) + dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + else + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; + + err = mlx4_en_alloc_resources(priv); + if (err) { + en_err(priv, "Failed reallocating port resources\n"); + goto out; + } + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + +out: + mutex_unlock(&mdev->state_lock); + netdev_features_change(dev); + return err; +} + +/* mlx4_en_read_clock - read raw cycle counter (to be used by time counter) + */ +static cycle_t mlx4_en_read_clock(const struct cyclecounter *tc) +{ + struct mlx4_en_dev *mdev = + container_of(tc, struct mlx4_en_dev, cycles); + struct mlx4_dev *dev = mdev->dev; + + return mlx4_read_clock(dev) & tc->mask; +} + +u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe) +{ + u64 hi, lo; + struct mlx4_ts_cqe *ts_cqe = (struct mlx4_ts_cqe *)cqe; + + lo = (u64)be16_to_cpu(ts_cqe->timestamp_lo); + hi = ((u64)be32_to_cpu(ts_cqe->timestamp_hi) + !lo) << 16; + + return hi | lo; +} + +void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, + struct skb_shared_hwtstamps *hwts, + u64 timestamp) +{ + u64 nsec; + + nsec = timecounter_cyc2time(&mdev->clock, timestamp); + + memset(hwts, 0, sizeof(struct skb_shared_hwtstamps)); + hwts->hwtstamp = ns_to_ktime(nsec); +} + +void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) +{ + struct mlx4_dev *dev = mdev->dev; + + memset(&mdev->cycles, 0, sizeof(mdev->cycles)); + mdev->cycles.read = mlx4_en_read_clock; + mdev->cycles.mask = CLOCKSOURCE_MASK(48); + /* Using shift to make calculation more accurate. Since current HW + * clock frequency is 427 MHz, and cycles are given using a 48 bits + * register, the biggest shift when calculating using u64, is 14 + * (max_cycles * multiplier < 2^64) + */ + mdev->cycles.shift = 14; + mdev->cycles.mult = + clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); + + timecounter_init(&mdev->clock, &mdev->cycles, + ktime_to_ns(ktime_get_real())); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index b8d0854a7ad..1e6c594d6d0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -77,6 +77,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, struct mlx4_en_dev *mdev = priv->mdev; int err = 0; char name[25]; + int timestamp_en = 0; struct cpu_rmap *rmap = #ifdef CONFIG_RFS_ACCEL priv->dev->rx_cpu_rmap; @@ -123,8 +124,13 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (!cq->is_tx) cq->size = priv->rx_ring[cq->ring].actual_size; - err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, - cq->wqres.db.dma, &cq->mcq, cq->vector, 0); + if ((cq->is_tx && priv->hwtstamp_config.tx_type) || + (!cq->is_tx && priv->hwtstamp_config.rx_filter)) + timestamp_en = 1; + + err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, + &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, + cq->vector, 0, timestamp_en); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 00f25b5f297..bcf4d118e98 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1147,6 +1147,35 @@ out: return err; } +static int mlx4_en_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int ret; + + ret = ethtool_op_get_ts_info(dev, info); + if (ret) + return ret; + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { + info->so_timestamping |= + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_ALL); + } + + return ret; +} + const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, .get_settings = mlx4_en_get_settings, @@ -1173,6 +1202,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .set_rxfh_indir = mlx4_en_set_rxfh_indir, .get_channels = mlx4_en_get_channels, .set_channels = mlx4_en_set_channels, + .get_ts_info = mlx4_en_get_ts_info, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index fc27800e9c3..a5c9df07a7d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -300,6 +300,11 @@ static void *mlx4_en_add(struct mlx4_dev *dev) if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) mdev->pndev[i] = NULL; } + + /* Initialize time stamp mechanism */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_init_timestamp(mdev); + return mdev; err_mr: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e7e27842d8d..4cb9f320397 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1916,6 +1916,75 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static int mlx4_en_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct hwtstamp_config config; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + /* device doesn't support time stamping */ + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)) + return -EINVAL; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + if (mlx4_en_timestamp_config(dev, config.tx_type, config.rx_filter)) { + config.tx_type = HWTSTAMP_TX_OFF; + config.rx_filter = HWTSTAMP_FILTER_NONE; + } + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx4_en_hwtstamp_ioctl(dev, ifr); + default: + return -EOPNOTSUPP; + } +} + static int mlx4_en_set_features(struct net_device *netdev, netdev_features_t features) { @@ -1943,6 +2012,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = mlx4_en_change_mtu, + .ndo_do_ioctl = mlx4_en_ioctl, .ndo_tx_timeout = mlx4_en_tx_timeout, .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, @@ -2054,6 +2124,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, spin_lock_init(&priv->filters_lock); #endif + /* Initialize time stamping config */ + priv->hwtstamp_config.flags = 0; + priv->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; + priv->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 10c24c784b7..91f2b2c43c1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -42,6 +42,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int user_prio, struct mlx4_qp_context *context) { struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; memset(context, 0, sizeof *context); context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET); @@ -65,6 +66,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); + if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) + context->param3 |= cpu_to_be32(1 << 30); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 4006f8857cb..02aee1ebd20 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -320,6 +320,8 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, } ring->buf = ring->wqres.buf.direct.buf; + ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter; + return 0; err_hwq: @@ -554,6 +556,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; @@ -565,6 +568,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud int polled = 0; int ip_summed; int factor = priv->cqe_factor; + u64 timestamp; if (!priv->port_up) return 0; @@ -669,8 +673,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud gro_skb->data_len = length; gro_skb->ip_summed = CHECKSUM_UNNECESSARY; - if (cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) { + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && + (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vid = be16_to_cpu(cqe->sl_vid); __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); @@ -680,8 +685,15 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud gro_skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid); skb_record_rx_queue(gro_skb, cq->ring); - napi_gro_frags(&cq->napi); + if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, + skb_hwtstamps(gro_skb), + timestamp); + } + + napi_gro_frags(&cq->napi); goto next; } @@ -714,10 +726,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (dev->features & NETIF_F_RXHASH) skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid); - if (be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_VLAN_PRESENT_MASK) + if ((be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK) && + (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); + if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), + timestamp); + } + /* Push it up the stack */ netif_receive_skb(skb); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 49308cc65ee..b0a2d2b96e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -118,6 +118,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, } else ring->bf_enabled = true; + ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type; + return 0; err_map: @@ -192,8 +194,9 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner) + int index, u8 owner, u64 timestamp) { + struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; @@ -204,6 +207,12 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, int i; __be32 *ptr = (__be32 *)tx_desc; __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); + struct skb_shared_hwtstamps hwts; + + if (timestamp) { + mlx4_en_fill_hwtstamps(mdev, &hwts, timestamp); + skb_tstamp_tx(skb, &hwts); + } /* Optimize the common case when there are no wraparounds */ if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { @@ -289,7 +298,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) while (ring->cons != ring->prod) { ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, ring->cons & ring->size_mask, - !!(ring->cons & ring->size)); + !!(ring->cons & ring->size), 0); ring->cons += ring->last_nr_txbb; cnt++; } @@ -318,6 +327,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; + u64 timestamp = 0; if (!priv->port_up) return; @@ -341,11 +351,14 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; + if (ring->tx_info[ring_index].ts_requested) + timestamp = mlx4_en_get_cqe_ts(cqe); + /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & - ring->size)); + ring->size), timestamp); packets++; bytes += ring->tx_info[ring_index].nr_bytes; } while (ring_index != new_index); @@ -629,6 +642,16 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->skb = skb; tx_info->nr_txbb = nr_txbb; + /* + * For timestamping add flag to skb_shinfo and + * set flag for further reference + */ + if (ring->hwtstamp_tx_type == HWTSTAMP_TX_ON && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_info->ts_requested = 1; + } + /* Prepare ctrl segement apart opcode+ownership, which depends on * whether LSO is used */ tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e81840faa6c..0d32a82458b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1228,6 +1228,28 @@ static void unmap_bf_area(struct mlx4_dev *dev) io_mapping_free(mlx4_priv(dev)->bf_mapping); } +cycle_t mlx4_read_clock(struct mlx4_dev *dev) +{ + u32 clockhi, clocklo, clockhi1; + cycle_t cycles; + int i; + struct mlx4_priv *priv = mlx4_priv(dev); + + for (i = 0; i < 10; i++) { + clockhi = swab32(readl(priv->clock_mapping)); + clocklo = swab32(readl(priv->clock_mapping + 4)); + clockhi1 = swab32(readl(priv->clock_mapping)); + if (clockhi == clockhi1) + break; + } + + cycles = (u64) clockhi << 32 | (u64) clocklo; + + return cycles; +} +EXPORT_SYMBOL_GPL(mlx4_read_clock); + + static int map_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d4cb5d3b28a..85b0754ec8a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -40,6 +40,7 @@ #include #include #include +#include #ifdef CONFIG_MLX4_EN_DCB #include #endif @@ -207,6 +208,7 @@ struct mlx4_en_tx_info { u8 linear; u8 data_offset; u8 inl; + u8 ts_requested; }; @@ -262,6 +264,7 @@ struct mlx4_en_tx_ring { struct mlx4_bf bf; bool bf_enabled; struct netdev_queue *tx_queue; + int hwtstamp_tx_type; }; struct mlx4_en_rx_desc { @@ -288,6 +291,7 @@ struct mlx4_en_rx_ring { unsigned long packets; unsigned long csum_ok; unsigned long csum_none; + int hwtstamp_rx_filter; }; struct mlx4_en_cq { @@ -348,6 +352,9 @@ struct mlx4_en_dev { u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; + struct cyclecounter cycles; + struct timecounter clock; + unsigned long last_overflow_check; }; @@ -525,6 +532,7 @@ struct mlx4_en_priv { struct device *ddev; int base_tx_qpn; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; + struct hwtstamp_config hwtstamp_config; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; @@ -639,7 +647,18 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); u64 mlx4_en_mac_to_u64(u8 *addr); /* - * Globals + * Functions for time stamping + */ +u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe); +void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, + struct skb_shared_hwtstamps *hwts, + u64 timestamp); +void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev); +int mlx4_en_timestamp_config(struct net_device *dev, + int tx_type, + int rx_filter); + +/* Globals */ extern const struct ethtool_ops mlx4_en_ethtool_ops; diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 6f65b2c8bb8..98fa492cf40 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -64,6 +64,22 @@ struct mlx4_err_cqe { u8 owner_sr_opcode; }; +struct mlx4_ts_cqe { + __be32 vlan_my_qpn; + __be32 immed_rss_invalid; + __be32 g_mlpath_rqpn; + __be32 timestamp_hi; + __be16 status; + u8 ipv6_ext_mask; + u8 badfcs_enc; + __be32 byte_cnt; + __be16 wqe_index; + __be16 checksum; + u8 reserved; + __be16 timestamp_lo; + u8 owner_sr_opcode; +} __packed; + enum { MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, MLX4_CQE_QPN_MASK = 0xffffff, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e088290d979..2fbc1464b53 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -40,6 +40,8 @@ #include +#include + #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 #define MSIX_LEGACY_SZ 4 @@ -840,7 +842,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - unsigned vector, int collapsed); + unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); @@ -1031,4 +1033,6 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); +cycle_t mlx4_read_clock(struct mlx4_dev *dev); + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3-70-g09d2 From 5a8eb24292ffd68604cedeb24ad2b4bc02cfc037 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 25 Apr 2013 04:42:29 +0000 Subject: pci: Add SRIOV helper function to determine if VFs are assigned to guest This function is meant to add a helper function that will determine if a PF has any VFs that are currently assigned to a guest. We currently have been implementing this function per driver, and going forward I would like to avoid that by making this function generic and using this helper. v2: Removed extern from declaration of pci_vfs_assigned in pci.h and return 0 if SR-IOV is disabled with is inline with other PCI SRIOV functions. Signed-off-by: Alexander Duyck Acked-by: Bjorn Helgaas Signed-off-by: Jeff Kirsher --- drivers/pci/iov.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 5 +++++ 2 files changed, 46 insertions(+) (limited to 'include') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index ee599f274f0..c93071d428f 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -728,6 +728,47 @@ int pci_num_vf(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_num_vf); +/** + * pci_vfs_assigned - returns number of VFs are assigned to a guest + * @dev: the PCI device + * + * Returns number of VFs belonging to this device that are assigned to a guest. + * If device is not a physical function returns -ENODEV. + */ +int pci_vfs_assigned(struct pci_dev *dev) +{ + struct pci_dev *vfdev; + unsigned int vfs_assigned = 0; + unsigned short dev_id; + + /* only search if we are a PF */ + if (!dev->is_physfn) + return 0; + + /* + * determine the device ID for the VFs, the vendor ID will be the + * same as the PF so there is no need to check for that one + */ + pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_VF_DID, &dev_id); + + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(dev->vendor, dev_id, NULL); + while (vfdev) { + /* + * It is considered assigned if it is a virtual function with + * our dev as the physical function and the assigned bit is set + */ + if (vfdev->is_virtfn && (vfdev->physfn == dev) && + (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) + vfs_assigned++; + + vfdev = pci_get_device(dev->vendor, dev_id, vfdev); + } + + return vfs_assigned; +} +EXPORT_SYMBOL_GPL(pci_vfs_assigned); + /** * pci_sriov_set_totalvfs -- reduce the TotalVFs available * @dev: the PCI PF device diff --git a/include/linux/pci.h b/include/linux/pci.h index 710067f3618..43e45ac3645 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1644,6 +1644,7 @@ extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); extern void pci_disable_sriov(struct pci_dev *dev); extern irqreturn_t pci_sriov_migration(struct pci_dev *dev); extern int pci_num_vf(struct pci_dev *dev); +int pci_vfs_assigned(struct pci_dev *dev); extern int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); extern int pci_sriov_get_totalvfs(struct pci_dev *dev); #else @@ -1662,6 +1663,10 @@ static inline int pci_num_vf(struct pci_dev *dev) { return 0; } +static inline int pci_vfs_assigned(struct pci_dev *dev) +{ + return 0; +} static inline int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs) { return 0; -- cgit v1.2.3-70-g09d2 From 7276d5d743d775388bf382cd7bdea1a14e486d32 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Apr 2013 00:39:30 +0000 Subject: packet: minor: convert status bits into shifting format This makes it more readable and clearer what bits are still free to use. The compiler reduces this to a constant for us anyway. Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 8136658ea47..4dfc234d80e 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -86,19 +86,19 @@ struct tpacket_auxdata { }; /* Rx ring - header status */ -#define TP_STATUS_KERNEL 0x0 -#define TP_STATUS_USER 0x1 -#define TP_STATUS_COPY 0x2 -#define TP_STATUS_LOSING 0x4 -#define TP_STATUS_CSUMNOTREADY 0x8 -#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */ -#define TP_STATUS_BLK_TMO 0x20 +#define TP_STATUS_KERNEL 0 +#define TP_STATUS_USER (1 << 0) +#define TP_STATUS_COPY (1 << 1) +#define TP_STATUS_LOSING (1 << 2) +#define TP_STATUS_CSUMNOTREADY (1 << 3) +#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */ +#define TP_STATUS_BLK_TMO (1 << 5) /* Tx ring - header status */ -#define TP_STATUS_AVAILABLE 0x0 -#define TP_STATUS_SEND_REQUEST 0x1 -#define TP_STATUS_SENDING 0x2 -#define TP_STATUS_WRONG_FORMAT 0x4 +#define TP_STATUS_AVAILABLE 0 +#define TP_STATUS_SEND_REQUEST (1 << 0) +#define TP_STATUS_SENDING (1 << 1) +#define TP_STATUS_WRONG_FORMAT (1 << 2) /* Rx ring - feature request bits */ #define TP_FT_REQ_FILL_RXHASH 0x1 -- cgit v1.2.3-70-g09d2 From b9c32fb2717094231b31a7d7dcf5fd7f3638ac2f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Apr 2013 00:39:31 +0000 Subject: packet: if hw/sw ts enabled in rx/tx ring, report which ts we got Currently, there is no way to find out which timestamp is reported in tpacket{,2,3}_hdr's tp_sec, tp_{n,u}sec members. It can be one of SOF_TIMESTAMPING_SYS_HARDWARE, SOF_TIMESTAMPING_RAW_HARDWARE, SOF_TIMESTAMPING_SOFTWARE, or a fallback variant late call from the PF_PACKET code in software. Therefore, report in the tp_status member of the ring buffer which timestamp has been reported for RX and TX path. This should not break anything for the following reasons: i) in RX ring path, the user needs to test for tp_status & TP_STATUS_USER, and later for other flags as well such as TP_STATUS_VLAN_VALID et al, so adding other flags will do no harm; ii) in TX ring path, time stamps with PACKET_TIMESTAMP socketoption are not available resp. had no effect except that the application setting this is buggy. Next to TP_STATUS_AVAILABLE, the user also should check for other flags such as TP_STATUS_WRONG_FORMAT to reclaim frames to the application. Thus, in case TX ts are turned off (default case), nothing happens to the application logic, and in case we want to use this new feature, we now can also check which of the ts source is reported in the status field as provided in the docs. Reported-by: Richard Cochran Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 5 +++++ net/packet/af_packet.c | 36 +++++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 4dfc234d80e..b950c02030c 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -100,6 +100,11 @@ struct tpacket_auxdata { #define TP_STATUS_SENDING (1 << 1) #define TP_STATUS_WRONG_FORMAT (1 << 2) +/* Rx and Tx ring - header status */ +#define TP_STATUS_TS_SOFTWARE (1 << 29) +#define TP_STATUS_TS_SYS_HARDWARE (1 << 30) +#define TP_STATUS_TS_RAW_HARDWARE (1 << 31) + /* Rx ring - feature request bits */ #define TP_FT_REQ_FILL_RXHASH 0x1 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9d46a07a15c..ba8309a3e01 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -339,34 +339,35 @@ static int __packet_get_status(struct packet_sock *po, void *frame) } } -static bool tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, - unsigned int flags) +static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, + unsigned int flags) { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (shhwtstamps) { if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) - return true; + return TP_STATUS_TS_SYS_HARDWARE; if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) - return true; + return TP_STATUS_TS_RAW_HARDWARE; } if (ktime_to_timespec_cond(skb->tstamp, ts)) - return true; + return TP_STATUS_TS_SOFTWARE; - return false; + return 0; } -static void __packet_set_timestamp(struct packet_sock *po, void *frame, - struct sk_buff *skb) +static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, + struct sk_buff *skb) { union tpacket_uhdr h; struct timespec ts; + __u32 ts_status; - if (!tpacket_get_timestamp(skb, &ts, po->tp_tstamp)) - return; + if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + return 0; h.raw = frame; switch (po->tp_version) { @@ -387,6 +388,8 @@ static void __packet_set_timestamp(struct packet_sock *po, void *frame, /* one flush is safe, as both fields always lie on the same cacheline */ flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); smp_wmb(); + + return ts_status; } static void *packet_lookup_frame(struct packet_sock *po, @@ -1721,6 +1724,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timespec ts; + __u32 ts_status; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -1803,9 +1807,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); - if (!tpacket_get_timestamp(skb, &ts, po->tp_tstamp)) + + if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) getnstimeofday(&ts); + status |= ts_status; + switch (po->tp_version) { case TPACKET_V1: h.h1->tp_len = skb->len; @@ -1905,11 +1912,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb) void *ph; if (likely(po->tx_ring.pg_vec)) { + __u32 ts; + ph = skb_shinfo(skb)->destructor_arg; BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); - __packet_set_timestamp(po, ph, skb); - __packet_set_status(po, ph, TP_STATUS_AVAILABLE); + + ts = __packet_set_timestamp(po, ph, skb); + __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); } sock_wfree(skb); -- cgit v1.2.3-70-g09d2 From def3117493eafd9dfa1f809d861e0031b2cc8a07 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 23 Apr 2013 07:48:30 +0000 Subject: genl: Allow concurrent genl callbacks. All genl callbacks are serialized by genl-mutex. This can become bottleneck in multi threaded case. Following patch adds an parameter to genl_family so that a particular family can get concurrent netlink callback without genl_lock held. New rw-sem is used to protect genl callback from genl family unregister. in case of parallel_ops genl-family read-lock is taken for callbacks and write lock is taken for register or unregistration for any family. In case of locked genl family semaphore and gel-mutex is locked for any openration. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/genetlink.h | 1 + net/netlink/genetlink.c | 114 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 77 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index bdfbe68c1c3..93024a47e0e 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -50,6 +50,7 @@ struct genl_family { unsigned int version; unsigned int maxattr; bool netnsok; + bool parallel_ops; int (*pre_doit)(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 5a55be3f17a..2f72598dd8f 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -16,10 +16,12 @@ #include #include #include +#include #include #include static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ +static DECLARE_RWSEM(cb_lock); void genl_lock(void) { @@ -41,6 +43,18 @@ int lockdep_genl_is_held(void) EXPORT_SYMBOL(lockdep_genl_is_held); #endif +static void genl_lock_all(void) +{ + down_write(&cb_lock); + genl_lock(); +} + +static void genl_unlock_all(void) +{ + genl_unlock(); + up_write(&cb_lock); +} + #define GENL_FAM_TAB_SIZE 16 #define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) @@ -144,7 +158,7 @@ int genl_register_mc_group(struct genl_family *family, BUG_ON(grp->name[0] == '\0'); BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); - genl_lock(); + genl_lock_all(); /* special-case our own group */ if (grp == ¬ify_grp) @@ -213,7 +227,7 @@ int genl_register_mc_group(struct genl_family *family, genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); out: - genl_unlock(); + genl_unlock_all(); return err; } EXPORT_SYMBOL(genl_register_mc_group); @@ -255,9 +269,9 @@ static void __genl_unregister_mc_group(struct genl_family *family, void genl_unregister_mc_group(struct genl_family *family, struct genl_multicast_group *grp) { - genl_lock(); + genl_lock_all(); __genl_unregister_mc_group(family, grp); - genl_unlock(); + genl_unlock_all(); } EXPORT_SYMBOL(genl_unregister_mc_group); @@ -303,9 +317,9 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) if (ops->policy) ops->flags |= GENL_CMD_CAP_HASPOL; - genl_lock(); + genl_lock_all(); list_add_tail(&ops->ops_list, &family->ops_list); - genl_unlock(); + genl_unlock_all(); genl_ctrl_event(CTRL_CMD_NEWOPS, ops); err = 0; @@ -334,16 +348,16 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) { struct genl_ops *rc; - genl_lock(); + genl_lock_all(); list_for_each_entry(rc, &family->ops_list, ops_list) { if (rc == ops) { list_del(&ops->ops_list); - genl_unlock(); + genl_unlock_all(); genl_ctrl_event(CTRL_CMD_DELOPS, ops); return 0; } } - genl_unlock(); + genl_unlock_all(); return -ENOENT; } @@ -373,7 +387,7 @@ int genl_register_family(struct genl_family *family) INIT_LIST_HEAD(&family->ops_list); INIT_LIST_HEAD(&family->mcast_groups); - genl_lock(); + genl_lock_all(); if (genl_family_find_byname(family->name)) { err = -EEXIST; @@ -394,7 +408,7 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } - if (family->maxattr) { + if (family->maxattr && !family->parallel_ops) { family->attrbuf = kmalloc((family->maxattr+1) * sizeof(struct nlattr *), GFP_KERNEL); if (family->attrbuf == NULL) { @@ -405,14 +419,14 @@ int genl_register_family(struct genl_family *family) family->attrbuf = NULL; list_add_tail(&family->family_list, genl_family_chain(family->id)); - genl_unlock(); + genl_unlock_all(); genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); return 0; errout_locked: - genl_unlock(); + genl_unlock_all(); errout: return err; } @@ -476,7 +490,7 @@ int genl_unregister_family(struct genl_family *family) { struct genl_family *rc; - genl_lock(); + genl_lock_all(); genl_unregister_mc_groups(family); @@ -486,14 +500,14 @@ int genl_unregister_family(struct genl_family *family) list_del(&rc->family_list); INIT_LIST_HEAD(&family->ops_list); - genl_unlock(); + genl_unlock_all(); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family); return 0; } - genl_unlock(); + genl_unlock_all(); return -ENOENT; } @@ -530,19 +544,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); -static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int genl_family_rcv_msg(struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh) { struct genl_ops *ops; - struct genl_family *family; struct net *net = sock_net(skb->sk); struct genl_info info; struct genlmsghdr *hdr = nlmsg_data(nlh); + struct nlattr **attrbuf; int hdrlen, err; - family = genl_family_find_byid(nlh->nlmsg_type); - if (family == NULL) - return -ENOENT; - /* this family doesn't exist in this netns */ if (!family->netnsok && !net_eq(net, &init_net)) return -ENOENT; @@ -560,26 +572,30 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + if (ops->dumpit == NULL) return -EOPNOTSUPP; - genl_unlock(); - { - struct netlink_dump_control c = { - .dump = ops->dumpit, - .done = ops->done, - }; - err = netlink_dump_start(net->genl_sock, skb, nlh, &c); - } - genl_lock(); - return err; + return netlink_dump_start(net->genl_sock, skb, nlh, &c); } if (ops->doit == NULL) return -EOPNOTSUPP; - if (family->attrbuf) { - err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, + if (family->maxattr && family->parallel_ops) { + attrbuf = kmalloc((family->maxattr+1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (attrbuf == NULL) + return -ENOMEM; + } else + attrbuf = family->attrbuf; + + if (attrbuf) { + err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, ops->policy); if (err < 0) return err; @@ -590,7 +606,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; - info.attrs = family->attrbuf; + info.attrs = attrbuf; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); @@ -605,14 +621,37 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (family->post_doit) family->post_doit(ops, skb, &info); + if (family->parallel_ops) + kfree(attrbuf); + + return err; +} + +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct genl_family *family; + int err; + + family = genl_family_find_byid(nlh->nlmsg_type); + if (family == NULL) + return -ENOENT; + + if (!family->parallel_ops) + genl_lock(); + + err = genl_family_rcv_msg(family, skb, nlh); + + if (!family->parallel_ops) + genl_unlock(); + return err; } static void genl_rcv(struct sk_buff *skb) { - genl_lock(); + down_read(&cb_lock); netlink_rcv_skb(skb, &genl_rcv_msg); - genl_unlock(); + up_read(&cb_lock); } /************************************************************************** @@ -918,7 +957,6 @@ static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, - .cb_mutex = &genl_mutex, .flags = NL_CFG_F_NONROOT_RECV, }; -- cgit v1.2.3-70-g09d2 From 8f7ba3ca12f6f16526fa4a8aaf2cae91563eee69 Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 25 Apr 2013 05:22:27 +0000 Subject: net/mlx4: Add set VF mac address support Add ndo_set_vf_mac support which allows to set the MAC address for mlx4 VF Ethernet NICs from the host. Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 31 +++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 42 +++++++++++++++++++++++++- include/linux/mlx4/cmd.h | 1 + 3 files changed, 73 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 0a301e1a063..a029124fe2f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2016,3 +2016,34 @@ u32 mlx4_comm_get_version(void) { return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER; } + +static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) +{ + if ((vf < 0) || (vf >= dev->num_vfs)) { + mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs); + return -EINVAL; + } + + return vf+1; +} + +int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + s_info->mac = mac; + mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n", + vf, port, s_info->mac); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 05c7c13bdbd..8293a92bf15 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2024,6 +2024,19 @@ static int mlx4_en_set_features(struct net_device *netdev, } +static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + u64 mac_u64 = mlx4_en_mac_to_u64(mac); + + if (!is_valid_ether_addr(mac)) + return -EINVAL; + + return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); +} + + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2048,6 +2061,30 @@ static const struct net_device_ops mlx4_netdev_ops = { #endif }; +static const struct net_device_ops mlx4_netdev_ops_master = { + .ndo_open = mlx4_en_open, + .ndo_stop = mlx4_en_close, + .ndo_start_xmit = mlx4_en_xmit, + .ndo_select_queue = mlx4_en_select_queue, + .ndo_get_stats = mlx4_en_get_stats, + .ndo_set_rx_mode = mlx4_en_set_rx_mode, + .ndo_set_mac_address = mlx4_en_set_mac, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = mlx4_en_change_mtu, + .ndo_tx_timeout = mlx4_en_tx_timeout, + .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, + .ndo_set_vf_mac = mlx4_en_set_vf_mac, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = mlx4_en_netpoll, +#endif + .ndo_set_features = mlx4_en_set_features, + .ndo_setup_tc = mlx4_en_setup_tc, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx4_en_filter_rfs, +#endif +}; + int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { @@ -2164,7 +2201,10 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, /* * Initialize netdev entry points */ - dev->netdev_ops = &mlx4_netdev_ops; + if (mlx4_is_master(priv->mdev->dev)) + dev->netdev_ops = &mlx4_netdev_ops_master; + else + dev->netdev_ops = &mlx4_netdev_ops; dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT; netif_set_real_num_tx_queues(dev, priv->tx_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 26069518625..f21ddc6203b 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -232,6 +232,7 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox); u32 mlx4_comm_get_version(void); +int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) -- cgit v1.2.3-70-g09d2 From 3f7fb021d081c8aaac1d0cf69a288d21625e872e Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 25 Apr 2013 05:22:28 +0000 Subject: net/mlx4: Add set VF default vlan ID and priority support Add support to ndo_set_vf_vlan in the driver. Once this call is used the vport is considered to be in VST mode. In this mode, the PPF driver configures Ethernet QPs created by this VF to use this vlan id and priority. Currently RoCE isn't supported on that mode. The special values of VID=4095 or VID=0,UP=0 are considered as VGT. Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 72 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++ drivers/net/ethernet/mellanox/mlx4/fw.c | 5 ++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 + drivers/net/ethernet/mellanox/mlx4/port.c | 4 +- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 36 +++++++++++ include/linux/mlx4/cmd.h | 2 + include/linux/mlx4/device.h | 3 +- 8 files changed, 126 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a029124fe2f..aad6f8dbfb4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1492,14 +1492,48 @@ out: static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) { - int port; + int port, err; + struct mlx4_vport_state *vp_admin; + struct mlx4_vport_oper_state *vp_oper; + for (port = 1; port <= MLX4_MAX_PORTS; port++) { - priv->mfunc.master.vf_oper[slave].vport[port].state = - priv->mfunc.master.vf_admin[slave].vport[port]; + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vp_oper->state = *vp_admin; + if (MLX4_VGT != vp_admin->default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, &(vp_oper->vlan_idx)); + if (err) { + vp_oper->vlan_idx = NO_INDX; + mlx4_warn((&priv->dev), + "No vlan resorces slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg((&(priv->dev)), "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } } return 0; } +static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave) +{ + int port; + struct mlx4_vport_oper_state *vp_oper; + + for (port = 1; port <= MLX4_MAX_PORTS; port++) { + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + if (NO_INDX != vp_oper->vlan_idx) { + __mlx4_unregister_vlan(&priv->dev, + port, vp_oper->vlan_idx); + vp_oper->vlan_idx = NO_INDX; + } + } + return; +} + static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, u16 param, u8 toggle) { @@ -1520,6 +1554,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (cmd == MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; + mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; slave_state[slave].event_eq[i].token = 0; @@ -1566,7 +1601,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2) goto reset_slave; slave_state[slave].vhcr_dma |= param; - mlx4_master_activate_admin_state(priv, slave); + if (mlx4_master_activate_admin_state(priv, slave)) + goto reset_slave; slave_state[slave].active = true; mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); break; @@ -1776,6 +1812,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } INIT_LIST_HEAD(&s_state->mcast_filters[port]); priv->mfunc.master.vf_admin[i].vport[port].default_vlan = MLX4_VGT; + priv->mfunc.master.vf_oper[i].vport[port].state.default_vlan = MLX4_VGT; priv->mfunc.master.vf_oper[i].vport[port].vlan_idx = NO_INDX; priv->mfunc.master.vf_oper[i].vport[port].mac_idx = NO_INDX; } @@ -2047,3 +2084,30 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); + +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + + if ((!mlx4_is_master(dev)) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL)) + return -EPROTONOSUPPORT; + + if ((vlan > 4095) || (qos > 7)) + return -EINVAL; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + if ((0 == vlan) && (0 == qos)) + s_info->default_vlan = MLX4_VGT; + else + s_info->default_vlan = vlan; + s_info->default_qos = qos; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 8293a92bf15..c1f2c5b34d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2036,6 +2036,14 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); } +static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, @@ -2075,6 +2083,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, .ndo_set_vf_mac = mlx4_en_set_vf_mac, + .ndo_set_vf_vlan = mlx4_en_set_vf_vlan, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx4_en_netpoll, #endif diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 70d44adddc3..d2d30c94079 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -468,6 +468,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 +#define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 @@ -655,6 +656,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(dev_cap->max_counters, outbox, QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + if (field32 & (1 << 26)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { for (i = 1; i <= dev_cap->num_ports; ++i) { MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 7e1d10059ed..eac3dae10ef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1157,6 +1157,8 @@ int mlx4_change_port_types(struct mlx4_dev *dev, void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); +int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index d3408add874..946e0af5fae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -310,7 +310,7 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx) } EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan); -static int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, +int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; @@ -384,7 +384,7 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) } EXPORT_SYMBOL_GPL(mlx4_register_vlan); -static void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index f2d64435d8e..5083d4babfe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -353,6 +353,39 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, } } +static int update_vport_qp_param(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *inbox, + u8 slave) +{ + struct mlx4_qp_context *qpc = inbox->buf + 8; + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_priv *priv; + u32 qp_type; + int port; + + port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; + priv = mlx4_priv(dev); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + + if (MLX4_VGT != vp_oper->state.default_vlan) { + qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; + if (MLX4_QP_ST_RC == qp_type) + return -EINVAL; + + qpc->pri_path.vlan_index = vp_oper->vlan_idx; + qpc->pri_path.fl = (1 << 6) | (1 << 2); /* set cv bit and hide_cqe_vlan bit*/ + qpc->pri_path.feup |= 1 << 3; /* set fvl bit */ + qpc->pri_path.sched_queue &= 0xC7; + qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; + mlx4_dbg(dev, "qp %d port %d Q 0x%x set vlan to %d vidx %d feup %x fl %x\n", + be32_to_cpu(qpc->local_qpn) & 0xffffff, port, + (int)(qpc->pri_path.sched_queue), vp_oper->state.default_vlan, + vp_oper->vlan_idx, (int)(qpc->pri_path.feup), + (int)(qpc->pri_path.fl)); + } + return 0; +} + static int mpt_mask(struct mlx4_dev *dev) { return dev->caps.num_mpts - 1; @@ -2798,6 +2831,9 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); + err = update_vport_qp_param(dev, inbox, slave); + if (err) + return err; return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index f21ddc6203b..7daead75a6f 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -233,6 +233,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); + #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 2fbc1464b53..6606d8f5862 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -155,7 +155,8 @@ enum { MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4, - MLX4_DEV_CAP_FLAG2_TS = 1LL << 5 + MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, + MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6 }; enum { -- cgit v1.2.3-70-g09d2 From e6b6a2316379feebebacec0979b3ebc5743e7502 Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 25 Apr 2013 05:22:29 +0000 Subject: net/mlx4: Add VF MAC spoof checking support Add ndo_set_vf_spoofchk support Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 40 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 2 ++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 8 +++++ include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 3 +- 6 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index aad6f8dbfb4..eda347eeb5d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1514,6 +1514,21 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) (int)(vp_oper->state.default_vlan), vp_oper->vlan_idx, slave, port); } + if (vp_admin->spoofchk) { + vp_oper->mac_idx = __mlx4_register_mac(&priv->dev, + port, + vp_admin->mac); + if (0 > vp_oper->mac_idx) { + err = vp_oper->mac_idx; + vp_oper->mac_idx = NO_INDX; + mlx4_warn((&priv->dev), + "No mac resorces slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg((&(priv->dev)), "alloc mac %llx idx %d slave %d port %d\n", + vp_oper->state.mac, vp_oper->mac_idx, slave, port); + } } return 0; } @@ -1530,6 +1545,10 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave port, vp_oper->vlan_idx); vp_oper->vlan_idx = NO_INDX; } + if (NO_INDX != vp_oper->mac_idx) { + __mlx4_unregister_mac(&priv->dev, port, vp_oper->mac_idx); + vp_oper->mac_idx = NO_INDX; + } } return; } @@ -2111,3 +2130,24 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); + +int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + + if ((!mlx4_is_master(dev)) || + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + s_info->spoofchk = setting; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c1f2c5b34d9..8d197b41242 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2044,6 +2044,14 @@ static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); } +static int mlx4_en_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_spoofchk(mdev->dev, en_priv->port, vf, setting); +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, @@ -2084,6 +2092,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, .ndo_set_vf_mac = mlx4_en_set_vf_mac, .ndo_set_vf_vlan = mlx4_en_set_vf_vlan, + .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx4_en_netpoll, #endif diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d2d30c94079..b147bdd4076 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -659,6 +659,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 26)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; + if (field32 & (1 << 20)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { for (i = 1; i <= dev_cap->num_ports; ++i) { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 5083d4babfe..e12e0d2e0ee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -383,6 +383,14 @@ static int update_vport_qp_param(struct mlx4_dev *dev, vp_oper->vlan_idx, (int)(qpc->pri_path.feup), (int)(qpc->pri_path.fl)); } + if (vp_oper->state.spoofchk) { + qpc->pri_path.feup |= 1 << 5; /* set fsm bit */; + qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; + mlx4_dbg(dev, "spoof qp %d port %d feup 0x%x, myLmc 0x%x mindx %d\n", + be32_to_cpu(qpc->local_qpn) & 0xffffff, port, + (int)qpc->pri_path.feup, (int)qpc->pri_path.grh_mylmc, + vp_oper->mac_idx); + } return 0; } diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 7daead75a6f..95c3223719f 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -234,6 +234,7 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6606d8f5862..53acaf64189 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -156,7 +156,8 @@ enum { MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4, MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, - MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6 + MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, + MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7 }; enum { -- cgit v1.2.3-70-g09d2 From 2cccb9e4f3476da916146c2ec571c4f3eff738b1 Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 25 Apr 2013 05:22:30 +0000 Subject: net/mlx4: Add support to get VF config Support getting VF config. Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 33 ++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 +++++++ include/linux/mlx4/cmd.h | 2 ++ 3 files changed, 43 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index eda347eeb5d..1df56cc50ee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2151,3 +2151,36 @@ int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk); + +int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + ivf->vf = vf; + + /* need to convert it to a func */ + ivf->mac[0] = ((s_info->mac >> (5*8)) & 0xff); + ivf->mac[1] = ((s_info->mac >> (4*8)) & 0xff); + ivf->mac[2] = ((s_info->mac >> (3*8)) & 0xff); + ivf->mac[3] = ((s_info->mac >> (2*8)) & 0xff); + ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff); + ivf->mac[5] = ((s_info->mac) & 0xff); + + ivf->vlan = s_info->default_vlan; + ivf->qos = s_info->default_qos; + ivf->tx_rate = s_info->tx_rate; + ivf->spoofchk = s_info->spoofchk; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_get_vf_config); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 8d197b41242..a69a908614e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2052,6 +2052,13 @@ static int mlx4_en_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) return mlx4_set_vf_spoofchk(mdev->dev, en_priv->port, vf, setting); } +static int mlx4_en_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivf) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_get_vf_config(mdev->dev, en_priv->port, vf, ivf); +} static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, @@ -2093,6 +2100,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_set_vf_mac = mlx4_en_set_vf_mac, .ndo_set_vf_vlan = mlx4_en_set_vf_vlan, .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, + .ndo_get_vf_config = mlx4_en_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx4_en_netpoll, #endif diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 95c3223719f..adf6e0648f2 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -34,6 +34,7 @@ #define MLX4_CMD_H #include +#include enum { /* initialization and general commands */ @@ -235,6 +236,7 @@ u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); +int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) -- cgit v1.2.3-70-g09d2 From 5d174dd80ce94b7ed0950e31fc9a0122c689523b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sat, 27 Apr 2013 11:31:55 +0000 Subject: vxlan: source compatiablity with IFLA_VXLAN_GROUP (v2) Source compatiability for build iproute2 was broken by: commit c7995c43facc6e5dea4de63fa9d283a337aabeb1 Author: Atzm Watanabe vxlan: Allow setting destination to unicast address. Since this commit has not made it upstream (still net-next), and better to avoid gratitious changes to exported API's; go back to original definition, and add a comment. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 10 +++++----- include/uapi/linux/if_link.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e6ac16cd9ba..d8de8a1303e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1324,7 +1324,7 @@ static void vxlan_setup(struct net_device *dev) static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_ID] = { .type = NLA_U32 }, - [IFLA_VXLAN_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, @@ -1406,8 +1406,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, } dst->remote_vni = vni; - if (data[IFLA_VXLAN_REMOTE]) - dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_REMOTE]); + if (data[IFLA_VXLAN_GROUP]) + dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_GROUP]); if (data[IFLA_VXLAN_LOCAL]) vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]); @@ -1488,7 +1488,7 @@ static size_t vxlan_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ - nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_REMOTE */ + nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ nla_total_size(sizeof(__be32))+ /* IFLA_VXLAN_LOCAL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ @@ -1516,7 +1516,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) goto nla_put_failure; - if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_REMOTE, dst->remote_ip)) + if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_GROUP, dst->remote_ip)) goto nla_put_failure; if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex)) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e3163544f33..5346131524e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -297,7 +297,7 @@ enum macvlan_mode { enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, - IFLA_VXLAN_REMOTE, + IFLA_VXLAN_GROUP, /* group or remote address */ IFLA_VXLAN_LINK, IFLA_VXLAN_LOCAL, IFLA_VXLAN_TTL, -- cgit v1.2.3-70-g09d2 From 823aa873bc782f1c51b1ce8ec6da7cfcaf93836e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sat, 27 Apr 2013 11:31:57 +0000 Subject: vxlan: allow choosing destination port per vxlan Allow configuring the default destination port on a per-device basis. Adds new netlink paramater IFLA_VXLAN_PORT to allow setting destination port when creating new vxlan. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 18 +++++++++++++----- include/uapi/linux/if_link.h | 3 ++- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 66fd7bef0a4..f1d9e981e54 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -110,6 +110,7 @@ struct vxlan_dev { struct net_device *dev; struct vxlan_rdst default_dst; /* default destination */ __be32 saddr; /* source address */ + __be16 dst_port; __u16 port_min; /* source port range */ __u16 port_max; __u8 tos; /* TOS override */ @@ -192,7 +193,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip)) goto nla_put_failure; - if (rdst->remote_port && rdst->remote_port != htons(vxlan_port) && + if (rdst->remote_port && rdst->remote_port != vxlan->dst_port && nla_put_be16(skb, NDA_PORT, rdst->remote_port)) goto nla_put_failure; if (rdst->remote_vni != vxlan->default_dst.remote_vni && @@ -467,7 +468,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; port = nla_get_be16(tb[NDA_PORT]); } else - port = htons(vxlan_port); + port = vxlan->dst_port; if (tb[NDA_VNI]) { if (nla_len(tb[NDA_VNI]) != sizeof(u32)) @@ -579,7 +580,7 @@ static void vxlan_snoop(struct net_device *dev, err = vxlan_fdb_create(vxlan, src_mac, src_ip, NUD_REACHABLE, NLM_F_EXCL|NLM_F_CREATE, - vxlan_port, + vxlan->dst_port, vxlan->default_dst.remote_vni, 0, NTF_SELF); spin_unlock(&vxlan->hash_lock); @@ -970,7 +971,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, __be16 df = 0; __u8 tos, ttl; - dst_port = rdst->remote_port ? rdst->remote_port : htons(vxlan_port); + dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; vni = rdst->remote_vni; dst = rdst->remote_ip; @@ -1314,6 +1315,7 @@ static void vxlan_setup(struct net_device *dev) inet_get_local_port_range(&low, &high); vxlan->port_min = low; vxlan->port_max = high; + vxlan->dst_port = htons(vxlan_port); vxlan->dev = dev; @@ -1336,6 +1338,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_RSC] = { .type = NLA_U8 }, [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 }, [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 }, + [IFLA_VXLAN_PORT] = { .type = NLA_U16 }, }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1465,6 +1468,9 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, vxlan->port_max = ntohs(p->high); } + if (data[IFLA_VXLAN_PORT]) + vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); + SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); err = register_netdevice(dev); @@ -1500,6 +1506,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ nla_total_size(sizeof(struct ifla_vxlan_port_range)) + + nla_total_size(sizeof(__be16))+ /* IFLA_VXLAN_PORT */ 0; } @@ -1536,7 +1543,8 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_VXLAN_L3MISS, !!(vxlan->flags & VXLAN_F_L3MISS)) || nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) || - nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax)) + nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) || + nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port)) goto nla_put_failure; if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5346131524e..b05823cae78 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -305,11 +305,12 @@ enum { IFLA_VXLAN_LEARNING, IFLA_VXLAN_AGEING, IFLA_VXLAN_LIMIT, - IFLA_VXLAN_PORT_RANGE, + IFLA_VXLAN_PORT_RANGE, /* source port */ IFLA_VXLAN_PROXY, IFLA_VXLAN_RSC, IFLA_VXLAN_L2MISS, IFLA_VXLAN_L3MISS, + IFLA_VXLAN_PORT, /* destination port */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3-70-g09d2 From 626419038a3e4a1f61119a4af08d01415961eb4e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Apr 2013 06:53:52 +0000 Subject: packet_diag: disclose uid value This value is disclosed via /proc/net/packet but not via netlink messages. The goal is to have the same level of information. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/packet_diag.h | 1 + net/packet/diag.c | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index afafd703ad9..84f83a47b6f 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -32,6 +32,7 @@ enum { PACKET_DIAG_RX_RING, PACKET_DIAG_TX_RING, PACKET_DIAG_FANOUT, + PACKET_DIAG_UID, __PACKET_DIAG_MAX, }; diff --git a/net/packet/diag.c b/net/packet/diag.c index d3fcd1ebef7..04c8219a2d0 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -125,8 +125,10 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) return ret; } -static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, - u32 portid, u32 seq, u32 flags, int sk_ino) +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct packet_diag_req *req, + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct packet_diag_msg *rp; @@ -147,6 +149,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag pdiag_put_info(po, skb)) goto out_nlmsg_trim; + if ((req->pdiag_show & PACKET_SHOW_INFO) && + nla_put_u32(skb, PACKET_DIAG_UID, + from_kuid_munged(user_ns, sock_i_uid(sk)))) + goto out_nlmsg_trim; + if ((req->pdiag_show & PACKET_SHOW_MCLIST) && pdiag_put_mclist(po, skb)) goto out_nlmsg_trim; @@ -183,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (num < s_num) goto next; - if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - sock_i_ino(sk)) < 0) + if (sk_diag_fill(sk, skb, req, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) goto done; next: num++; -- cgit v1.2.3-70-g09d2 From 76d0eeb1a1579453cfd7c4da22004d4b34187ab4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Apr 2013 06:53:53 +0000 Subject: packet_diag: disclose meminfo values sk_rmem_alloc is disclosed via /proc/net/packet but not via netlink messages. The goal is to have the same level of information. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/packet_diag.h | 2 ++ net/packet/diag.c | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index 84f83a47b6f..c0802c18c8a 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -16,6 +16,7 @@ struct packet_diag_req { #define PACKET_SHOW_MCLIST 0x00000002 /* A set of packet_diag_mclist-s */ #define PACKET_SHOW_RING_CFG 0x00000004 /* Rings configuration parameters */ #define PACKET_SHOW_FANOUT 0x00000008 +#define PACKET_SHOW_MEMINFO 0x00000010 struct packet_diag_msg { __u8 pdiag_family; @@ -33,6 +34,7 @@ enum { PACKET_DIAG_TX_RING, PACKET_DIAG_FANOUT, PACKET_DIAG_UID, + PACKET_DIAG_MEMINFO, __PACKET_DIAG_MAX, }; diff --git a/net/packet/diag.c b/net/packet/diag.c index 04c8219a2d0..822fe9b33a4 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -166,6 +166,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, pdiag_put_fanout(po, skb)) goto out_nlmsg_trim; + if ((req->pdiag_show & PACKET_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: -- cgit v1.2.3-70-g09d2 From e8d9612c181b1a68ba5f71384629343466f1bd13 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Apr 2013 06:53:54 +0000 Subject: sock_diag: allow to dump bpf filters This patch allows to dump BPF filters attached to a socket with SO_ATTACH_FILTER. Note that we check CAP_SYS_ADMIN before allowing to dump this info. For now, only AF_PACKET sockets use this feature. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 3 +++ include/uapi/linux/packet_diag.h | 2 ++ net/core/sock_diag.c | 33 +++++++++++++++++++++++++++++++++ net/packet/diag.c | 4 ++++ 4 files changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index e8d702e0fd8..54f91d35e5f 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,6 +1,7 @@ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ +#include #include struct sk_buff; @@ -22,5 +23,7 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); +int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, + struct sk_buff *skb, int attrtype); #endif diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index c0802c18c8a..b2cc0cd9c4d 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -17,6 +17,7 @@ struct packet_diag_req { #define PACKET_SHOW_RING_CFG 0x00000004 /* Rings configuration parameters */ #define PACKET_SHOW_FANOUT 0x00000008 #define PACKET_SHOW_MEMINFO 0x00000010 +#define PACKET_SHOW_FILTER 0x00000020 struct packet_diag_msg { __u8 pdiag_family; @@ -35,6 +36,7 @@ enum { PACKET_DIAG_FANOUT, PACKET_DIAG_UID, PACKET_DIAG_MEMINFO, + PACKET_DIAG_FILTER, __PACKET_DIAG_MAX, }; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a29e90cf36b..d5bef0b0f63 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,6 +49,39 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); +int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, + struct sk_buff *skb, int attrtype) +{ + struct nlattr *attr; + struct sk_filter *filter; + unsigned int len; + int err = 0; + + if (!ns_capable(user_ns, CAP_NET_ADMIN)) { + nla_reserve(skb, attrtype, 0); + return 0; + } + + rcu_read_lock(); + + filter = rcu_dereference(sk->sk_filter); + len = filter ? filter->len * sizeof(struct sock_filter) : 0; + + attr = nla_reserve(skb, attrtype, len); + if (attr == NULL) { + err = -EMSGSIZE; + goto out; + } + + if (filter) + memcpy(nla_data(attr), filter->insns, len); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(sock_diag_put_filterinfo); + void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) { mutex_lock(&sock_diag_table_mutex); diff --git a/net/packet/diag.c b/net/packet/diag.c index 822fe9b33a4..a9584a2f6d6 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -170,6 +170,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO)) goto out_nlmsg_trim; + if ((req->pdiag_show & PACKET_SHOW_FILTER) && + sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: -- cgit v1.2.3-70-g09d2 From a4c4009f4f54dabaaea1bb2b2c3c8930e93cd409 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 25 Apr 2013 09:52:25 +0000 Subject: net: increase frag hash size Increase fragmentation hash bucket size to 1024 from old 64 elems. After we increased the frag mem limits commit c2a93660 (net: increase fragment memory usage limits) the hash size of 64 elements is simply too small. Also considering the mem limit is per netns and the hash table is shared for all netns. For the embedded people, note that this increase will change the hash table/array from using approx 1 Kbytes to 16 Kbytes. Signed-off-by: Jesper Dangaard Brouer Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 6f41b45e819..4182c9be8bb 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -41,7 +41,7 @@ struct inet_frag_queue { struct netns_frags *net; }; -#define INETFRAGS_HASHSZ 64 +#define INETFRAGS_HASHSZ 1024 /* averaged: * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / -- cgit v1.2.3-70-g09d2 From 5f5624cf156283687e11ea329c7a0523c677ea0e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 25 Apr 2013 11:08:30 +0000 Subject: ipv6: Kill ipv6 dependency of icmpv6_send(). Following patch adds icmp-registration module for ipv6. It allows ipv6 protocol to register icmp_sender which is used for sending ipv6 icmp msgs. This extra layer allows us to kill ipv6 dependency for sending icmp packets. This patch also fixes ip_tunnel compilation problem when ip_tunnel is statically compiled in kernel but ipv6 is module Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 18 +++++++++++++++--- net/ipv6/Makefile | 2 +- net/ipv6/icmp.c | 35 ++++++++++++++++++++--------------- net/ipv6/ip6_icmp.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 19 deletions(-) create mode 100644 net/ipv6/ip6_icmp.c (limited to 'include') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index b4f6c29cace..630f45335c7 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -11,9 +11,21 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #include -extern void icmpv6_send(struct sk_buff *skb, - u8 type, u8 code, - __u32 info); +#if IS_ENABLED(CONFIG_IPV6) +extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); + +typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info); +extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); +extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); + +#else + +static inline void icmpv6_send(struct sk_buff *skb, + u8 type, u8 code, __u32 info) +{ + +} +#endif extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 309af19a0a0..9af088d2cda 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o +obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 71b900c3f4f..2a53a790514 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -123,15 +123,6 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk) spin_unlock_bh(&sk->sk_lock.slock); } -/* - * Slightly more convenient version of icmpv6_send. - */ -void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) -{ - icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos); - kfree_skb(skb); -} - /* * Figure out, may we reply to this packet with icmp error. * @@ -332,7 +323,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk * anycast. */ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); + LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } @@ -381,7 +372,7 @@ relookup_failed: /* * Send an ICMP message in response to a packet in error */ -void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; @@ -406,7 +397,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) /* * Make sure we respect the rules * i.e. RFC 1885 2.4(e) - * Rule (e.1) is enforced by not using icmpv6_send + * Rule (e.1) is enforced by not using icmp6_send * in any code that processes icmp errors. */ addr_type = ipv6_addr_type(&hdr->daddr); @@ -444,7 +435,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); + LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n"); return; } @@ -452,7 +443,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"); + LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n"); return; } @@ -529,7 +520,14 @@ out_dst_release: out: icmpv6_xmit_unlock(sk); } -EXPORT_SYMBOL(icmpv6_send); + +/* Slightly more convenient version of icmp6_send. + */ +void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +{ + icmp6_send(skb, ICMPV6_PARAMPROB, code, pos); + kfree_skb(skb); +} static void icmpv6_echo_reply(struct sk_buff *skb) { @@ -885,8 +883,14 @@ int __init icmpv6_init(void) err = -EAGAIN; if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) goto fail; + + err = inet6_register_icmp_sender(icmp6_send); + if (err) + goto sender_reg_err; return 0; +sender_reg_err: + inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); unregister_pernet_subsys(&icmpv6_sk_ops); @@ -895,6 +899,7 @@ fail: void icmpv6_cleanup(void) { + inet6_unregister_icmp_sender(icmp6_send); unregister_pernet_subsys(&icmpv6_sk_ops); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c new file mode 100644 index 00000000000..4578e23834f --- /dev/null +++ b/net/ipv6/ip6_icmp.c @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +#include + +#if IS_ENABLED(CONFIG_IPV6) + +static ip6_icmp_send_t __rcu *ip6_icmp_send; + +int inet6_register_icmp_sender(ip6_icmp_send_t *fn) +{ + return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ? + 0 : -EBUSY; +} +EXPORT_SYMBOL(inet6_register_icmp_sender); + +int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) +{ + int ret; + + ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ? + 0 : -EINVAL; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(inet6_unregister_icmp_sender); + +void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +{ + ip6_icmp_send_t *send; + + rcu_read_lock(); + send = rcu_dereference(ip6_icmp_send); + + if (!send) + goto out; + send(skb, type, code, info); +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL(icmpv6_send); +#endif -- cgit v1.2.3-70-g09d2 From 43c56e595bb81319230affd545392536c933317e Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 8 Apr 2013 21:51:25 +0200 Subject: netfilter: ipset: Make possible to test elements marked with nomatch Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 8 ++++++++ net/netfilter/ipset/ip_set_hash_ipportnet.c | 14 ++++++++------ net/netfilter/ipset/ip_set_hash_net.c | 14 ++++++++------ net/netfilter/ipset/ip_set_hash_netiface.c | 14 ++++++++------ net/netfilter/ipset/ip_set_hash_netport.c | 14 ++++++++------ 5 files changed, 40 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 7958e84a65a..970187187f5 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -200,6 +200,14 @@ ip_set_eexist(int ret, u32 flags) return ret == -IPSET_ERR_EXIST && (flags & IPSET_FLAG_EXIST); } +/* Match elements marked with nomatch */ +static inline bool +ip_set_enomatch(int ret, u32 flags, enum ipset_adt adt) +{ + return adt == IPSET_TEST && + ret == -ENOTEMPTY && ((flags >> 16) & IPSET_FLAG_NOMATCH); +} + /* Check the NLA_F_NET_BYTEORDER flag */ static inline bool ip_set_attr_netorder(struct nlattr *tb[], int type) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 10a30b4fc7d..b4836c81fbb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -279,10 +279,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { + if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) - flags |= (cadt_flags << 16); + flags |= (IPSET_FLAG_NOMATCH << 16); } with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; @@ -292,7 +292,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], data.ip = htonl(ip); data.ip2 = htonl(ip2_from & ip_set_hostmask(data.cidr + 1)); ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip; @@ -610,15 +611,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { + if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) - flags |= (cadt_flags << 16); + flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(data.port); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d6a59154d71..6dbe0afc5a8 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -225,16 +225,17 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { + if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) - flags |= (cadt_flags << 16); + flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { data.ip = htonl(ip & ip_set_hostmask(data.cidr)); ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip; @@ -466,15 +467,16 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { + if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) - flags |= (cadt_flags << 16); + flags |= (IPSET_FLAG_NOMATCH << 16); } ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } /* Create hash:ip type of sets */ diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f2b0a3c3013..248162020d8 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -396,13 +396,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) data.physdev = 1; - if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) - flags |= (cadt_flags << 16); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { data.ip = htonl(ip & ip_set_hostmask(data.cidr)); ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } if (tb[IPSET_ATTR_IP_TO]) { @@ -704,13 +705,14 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) data.physdev = 1; - if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) - flags |= (cadt_flags << 16); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); } ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } /* Create hash:ip type of sets */ diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 349deb672a2..57b0550a0b0 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -272,16 +272,17 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; - if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { + if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) - flags |= (cadt_flags << 16); + flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) { data.ip = htonl(ip & ip_set_hostmask(data.cidr + 1)); ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } port = port_to = ntohs(data.port); @@ -561,15 +562,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { + if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) - flags |= (cadt_flags << 16); + flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout, flags); - return ip_set_eexist(ret, flags) ? 0 : ret; + return ip_set_enomatch(ret, flags, adt) ? 1 : + ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(data.port); -- cgit v1.2.3-70-g09d2 From 8672d4d1a00b59057bb1f9659259967d2a19e086 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 8 Apr 2013 20:54:37 +0200 Subject: netfilter: ipset: Move often used IPv6 address masking function to header file Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/pfxlen.h | 9 +++++++++ net/netfilter/ipset/ip_set_hash_ip.c | 9 --------- net/netfilter/ipset/ip_set_hash_ipportnet.c | 9 --------- net/netfilter/ipset/ip_set_hash_net.c | 9 --------- net/netfilter/ipset/ip_set_hash_netiface.c | 9 --------- net/netfilter/ipset/ip_set_hash_netport.c | 9 --------- 6 files changed, 9 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/pfxlen.h b/include/linux/netfilter/ipset/pfxlen.h index 199fd11fedc..1afbb94b4b6 100644 --- a/include/linux/netfilter/ipset/pfxlen.h +++ b/include/linux/netfilter/ipset/pfxlen.h @@ -41,4 +41,13 @@ do { \ to = from | ~ip_set_hostmask(cidr); \ } while (0) +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + #endif /*_PFXLEN_H */ diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index b7d4cb475ae..c486ad2d43b 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -255,15 +255,6 @@ hash_ip6_data_zero_out(struct hash_ip6_elem *elem) ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0); } -static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) -{ - ip->ip6[0] &= ip_set_netmask6(prefix)[0]; - ip->ip6[1] &= ip_set_netmask6(prefix)[1]; - ip->ip6[2] &= ip_set_netmask6(prefix)[2]; - ip->ip6[3] &= ip_set_netmask6(prefix)[3]; -} - static bool hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index b4836c81fbb..352f8b4a63e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -445,15 +445,6 @@ hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem) elem->proto = 0; } -static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) -{ - ip->ip6[0] &= ip_set_netmask6(prefix)[0]; - ip->ip6[1] &= ip_set_netmask6(prefix)[1]; - ip->ip6[2] &= ip_set_netmask6(prefix)[2]; - ip->ip6[3] &= ip_set_netmask6(prefix)[3]; -} - static inline void hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr) { diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 6dbe0afc5a8..370947c65b2 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -342,15 +342,6 @@ hash_net6_data_zero_out(struct hash_net6_elem *elem) elem->cidr = 0; } -static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) -{ - ip->ip6[0] &= ip_set_netmask6(prefix)[0]; - ip->ip6[1] &= ip_set_netmask6(prefix)[1]; - ip->ip6[2] &= ip_set_netmask6(prefix)[2]; - ip->ip6[3] &= ip_set_netmask6(prefix)[3]; -} - static inline void hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr) { diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 248162020d8..dd9843e3b06 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -528,15 +528,6 @@ hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) elem->elem = 0; } -static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) -{ - ip->ip6[0] &= ip_set_netmask6(prefix)[0]; - ip->ip6[1] &= ip_set_netmask6(prefix)[1]; - ip->ip6[2] &= ip_set_netmask6(prefix)[2]; - ip->ip6[3] &= ip_set_netmask6(prefix)[3]; -} - static inline void hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr) { diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 57b0550a0b0..cd1d3c1df72 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -406,15 +406,6 @@ hash_netport6_data_zero_out(struct hash_netport6_elem *elem) elem->proto = 0; } -static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) -{ - ip->ip6[0] &= ip_set_netmask6(prefix)[0]; - ip->ip6[1] &= ip_set_netmask6(prefix)[1]; - ip->ip6[2] &= ip_set_netmask6(prefix)[2]; - ip->ip6[3] &= ip_set_netmask6(prefix)[3]; -} - static inline void hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr) { -- cgit v1.2.3-70-g09d2 From 075e64c041b5d3c29651965608e1e76505e01d54 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Apr 2013 14:28:55 +0200 Subject: netfilter: ipset: Introduce extensions to elements in the core Introduce extensions to elements in the core and prepare timeout as the first one. This patch also modifies the em_ipset classifier to use the new extension struct layout. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 46 +++++++++-- include/linux/netfilter/ipset/ip_set_timeout.h | 102 ++++++------------------- net/netfilter/ipset/ip_set_core.c | 24 ++++-- net/netfilter/xt_set.c | 24 ++---- net/sched/em_ipset.c | 2 +- 5 files changed, 87 insertions(+), 111 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 970187187f5..bf0220cbf46 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -1,7 +1,7 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf * Martin Josefsson - * Copyright (C) 2003-2011 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -47,10 +47,30 @@ enum ip_set_feature { IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), }; +/* Set extensions */ +enum ip_set_extension { + IPSET_EXT_NONE = 0, + IPSET_EXT_BIT_TIMEOUT = 1, + IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT), +}; + +/* Extension offsets */ +enum ip_set_offset { + IPSET_OFFSET_TIMEOUT = 0, + IPSET_OFFSET_MAX, +}; + +#define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) + +struct ip_set_ext { + unsigned long timeout; +}; + struct ip_set; typedef int (*ipset_adtfn)(struct ip_set *set, void *value, - u32 timeout, u32 flags); + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); /* Kernel API function options */ struct ip_set_adt_opt { @@ -58,7 +78,7 @@ struct ip_set_adt_opt { u8 dim; /* Dimension of match/target */ u8 flags; /* Direction and negation flags */ u32 cmdflags; /* Command-like flags */ - u32 timeout; /* Timeout value */ + struct ip_set_ext ext; /* Extensions */ }; /* Set type, variant-specific part */ @@ -69,7 +89,7 @@ struct ip_set_type_variant { * positive for matching element */ int (*kadt)(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, - enum ipset_adt adt, const struct ip_set_adt_opt *opt); + enum ipset_adt adt, struct ip_set_adt_opt *opt); /* Userspace: test/add/del entries * returns negative error code, @@ -151,6 +171,8 @@ struct ip_set { u8 family; /* The type revision */ u8 revision; + /* Extensions */ + u8 extensions; /* The type specific data */ void *data; }; @@ -167,19 +189,21 @@ extern void ip_set_nfnl_put(ip_set_id_t index); extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb, const struct xt_action_param *par, - const struct ip_set_adt_opt *opt); + struct ip_set_adt_opt *opt); extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb, const struct xt_action_param *par, - const struct ip_set_adt_opt *opt); + struct ip_set_adt_opt *opt); extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb, const struct xt_action_param *par, - const struct ip_set_adt_opt *opt); + struct ip_set_adt_opt *opt); /* Utility functions */ extern void *ip_set_alloc(size_t size); extern void ip_set_free(void *members); extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); +extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], + struct ip_set_ext *ext); static inline int ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr) @@ -292,4 +316,12 @@ bitmap_bytes(u32 a, u32 b) return 4 * ((((b - a + 8) / 8) + 3) / 4); } +#include + +#define IP_SET_INIT_KEXT(skb, opt, map) \ + { .timeout = ip_set_adt_opt_timeout(opt, map) } + +#define IP_SET_INIT_UEXT(map) \ + { .timeout = (map)->timeout } + #endif /*_IP_SET_H */ diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 41d9cfa0816..3aac04167ca 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -1,7 +1,7 @@ #ifndef _IP_SET_TIMEOUT_H #define _IP_SET_TIMEOUT_H -/* Copyright (C) 2003-2011 Jozsef Kadlecsik +/* Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,13 +17,14 @@ #define IPSET_GC_PERIOD(timeout) \ ((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1) -/* Set is defined without timeout support: timeout value may be 0 */ -#define IPSET_NO_TIMEOUT UINT_MAX +/* Entry is set with no timeout value */ +#define IPSET_ELEM_PERMANENT 0 -#define with_timeout(timeout) ((timeout) != IPSET_NO_TIMEOUT) +/* Set is defined with timeout support: timeout value may be 0 */ +#define IPSET_NO_TIMEOUT UINT_MAX -#define opt_timeout(opt, map) \ - (with_timeout((opt)->timeout) ? (opt)->timeout : (map)->timeout) +#define ip_set_adt_opt_timeout(opt, map) \ +((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (map)->timeout) static inline unsigned int ip_set_timeout_uget(struct nlattr *tb) @@ -38,61 +39,6 @@ ip_set_timeout_uget(struct nlattr *tb) return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout; } -#ifdef IP_SET_BITMAP_TIMEOUT - -/* Bitmap specific timeout constants and macros for the entries */ - -/* Bitmap entry is unset */ -#define IPSET_ELEM_UNSET 0 -/* Bitmap entry is set with no timeout value */ -#define IPSET_ELEM_PERMANENT (UINT_MAX/2) - -static inline bool -ip_set_timeout_test(unsigned long timeout) -{ - return timeout != IPSET_ELEM_UNSET && - (timeout == IPSET_ELEM_PERMANENT || - time_is_after_jiffies(timeout)); -} - -static inline bool -ip_set_timeout_expired(unsigned long timeout) -{ - return timeout != IPSET_ELEM_UNSET && - timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(timeout); -} - -static inline unsigned long -ip_set_timeout_set(u32 timeout) -{ - unsigned long t; - - if (!timeout) - return IPSET_ELEM_PERMANENT; - - t = msecs_to_jiffies(timeout * 1000) + jiffies; - if (t == IPSET_ELEM_UNSET || t == IPSET_ELEM_PERMANENT) - /* Bingo! */ - t++; - - return t; -} - -static inline u32 -ip_set_timeout_get(unsigned long timeout) -{ - return timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(timeout - jiffies)/1000; -} - -#else - -/* Hash specific timeout constants and macros for the entries */ - -/* Hash entry is set with no timeout value */ -#define IPSET_ELEM_PERMANENT 0 - static inline bool ip_set_timeout_test(unsigned long timeout) { @@ -101,36 +47,32 @@ ip_set_timeout_test(unsigned long timeout) } static inline bool -ip_set_timeout_expired(unsigned long timeout) +ip_set_timeout_expired(unsigned long *timeout) { - return timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(timeout); + return *timeout != IPSET_ELEM_PERMANENT && + time_is_before_jiffies(*timeout); } -static inline unsigned long -ip_set_timeout_set(u32 timeout) +static inline void +ip_set_timeout_set(unsigned long *timeout, u32 t) { - unsigned long t; - - if (!timeout) - return IPSET_ELEM_PERMANENT; + if (!t) { + *timeout = IPSET_ELEM_PERMANENT; + return; + } - t = msecs_to_jiffies(timeout * 1000) + jiffies; - if (t == IPSET_ELEM_PERMANENT) + *timeout = msecs_to_jiffies(t * 1000) + jiffies; + if (*timeout == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ - t++; - - return t; + (*timeout)--; } static inline u32 -ip_set_timeout_get(unsigned long timeout) +ip_set_timeout_get(unsigned long *timeout) { - return timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(timeout - jiffies)/1000; + return *timeout == IPSET_ELEM_PERMANENT ? 0 : + jiffies_to_msecs(*timeout - jiffies)/1000; } -#endif /* ! IP_SET_BITMAP_TIMEOUT */ #endif /* __KERNEL__ */ - #endif /* _IP_SET_TIMEOUT_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 86f5e26f39d..4486285d10d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1,6 +1,6 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf - * Copyright (C) 2003-2011 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -315,6 +315,19 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +int +ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], + struct ip_set_ext *ext) +{ + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!(set->extensions & IPSET_EXT_TIMEOUT)) + return -IPSET_ERR_TIMEOUT; + ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_get_extensions); + /* * Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace @@ -365,8 +378,7 @@ ip_set_rcu_get(ip_set_id_t index) int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, - const struct xt_action_param *par, - const struct ip_set_adt_opt *opt) + const struct xt_action_param *par, struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_rcu_get(index); int ret = 0; @@ -404,8 +416,7 @@ EXPORT_SYMBOL_GPL(ip_set_test); int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, - const struct xt_action_param *par, - const struct ip_set_adt_opt *opt) + const struct xt_action_param *par, struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_rcu_get(index); int ret; @@ -427,8 +438,7 @@ EXPORT_SYMBOL_GPL(ip_set_add); int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, - const struct xt_action_param *par, - const struct ip_set_adt_opt *opt) + const struct xt_action_param *par, struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_rcu_get(index); int ret = 0; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 865a9e54f3a..636c5199ff3 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -1,7 +1,7 @@ /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf * Martin Josefsson - * Copyright (C) 2003-2011 Jozsef Kadlecsik + * Copyright (C) 2003-2013 Jozsef Kadlecsik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SET"); static inline int match_set(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, - const struct ip_set_adt_opt *opt, int inv) + struct ip_set_adt_opt *opt, int inv) { if (ip_set_test(index, skb, par, opt)) inv = !inv; @@ -38,20 +38,12 @@ match_set(ip_set_id_t index, const struct sk_buff *skb, } #define ADT_OPT(n, f, d, fs, cfs, t) \ -const struct ip_set_adt_opt n = { \ - .family = f, \ - .dim = d, \ - .flags = fs, \ - .cmdflags = cfs, \ - .timeout = t, \ -} -#define ADT_MOPT(n, f, d, fs, cfs, t) \ struct ip_set_adt_opt n = { \ .family = f, \ .dim = d, \ .flags = fs, \ .cmdflags = cfs, \ - .timeout = t, \ + .ext.timeout = t, \ } /* Revision 0 interface: backward compatible with netfilter/iptables */ @@ -305,15 +297,15 @@ static unsigned int set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; - ADT_MOPT(add_opt, par->family, info->add_set.dim, - info->add_set.flags, info->flags, info->timeout); + ADT_OPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, info->del_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ - if (add_opt.timeout != IPSET_NO_TIMEOUT && - add_opt.timeout > UINT_MAX/MSEC_PER_SEC) - add_opt.timeout = UINT_MAX/MSEC_PER_SEC; + if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && + add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 3130320997e..938b7cbf562 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -83,7 +83,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, opt.dim = set->dim; opt.flags = set->flags; opt.cmdflags = 0; - opt.timeout = ~0u; + opt.ext.timeout = ~0u; network_offset = skb_network_offset(skb); skb_pull(skb, network_offset); -- cgit v1.2.3-70-g09d2 From 4d73de38c256623b324474098f7d2bb4e97f7cf0 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 8 Apr 2013 21:00:52 +0200 Subject: netfilter: ipset: Unified bitmap type generation Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set_bitmap.h | 6 + net/netfilter/ipset/ip_set_bitmap_gen.h | 265 ++++++++++++++++++++++++++ 2 files changed, 271 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_bitmap_gen.h (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h index 1a30646d5be..5e4662a71e0 100644 --- a/include/linux/netfilter/ipset/ip_set_bitmap.h +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -5,6 +5,12 @@ #define IPSET_BITMAP_MAX_RANGE 0x0000FFFF +enum { + IPSET_ADD_FAILED = 1, + IPSET_ADD_STORE_PLAIN_TIMEOUT, + IPSET_ADD_START_STORED_TIMEOUT, +}; + /* Common functions */ static inline u32 diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h new file mode 100644 index 00000000000..b9931591cbe --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -0,0 +1,265 @@ +/* Copyright (C) 2013 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __IP_SET_BITMAP_IP_GEN_H +#define __IP_SET_BITMAP_IP_GEN_H + +#define CONCAT(a, b) a##b +#define TOKEN(a,b) CONCAT(a, b) + +#define mtype_do_test TOKEN(MTYPE, _do_test) +#define mtype_gc_test TOKEN(MTYPE, _gc_test) +#define mtype_is_filled TOKEN(MTYPE, _is_filled) +#define mtype_do_add TOKEN(MTYPE, _do_add) +#define mtype_do_del TOKEN(MTYPE, _do_del) +#define mtype_do_list TOKEN(MTYPE, _do_list) +#define mtype_do_head TOKEN(MTYPE, _do_head) +#define mtype_adt_elem TOKEN(MTYPE, _adt_elem) +#define mtype_add_timeout TOKEN(MTYPE, _add_timeout) +#define mtype_gc_init TOKEN(MTYPE, _gc_init) +#define mtype_kadt TOKEN(MTYPE, _kadt) +#define mtype_uadt TOKEN(MTYPE, _uadt) +#define mtype_destroy TOKEN(MTYPE, _destroy) +#define mtype_flush TOKEN(MTYPE, _flush) +#define mtype_head TOKEN(MTYPE, _head) +#define mtype_same_set TOKEN(MTYPE, _same_set) +#define mtype_elem TOKEN(MTYPE, _elem) +#define mtype_test TOKEN(MTYPE, _test) +#define mtype_add TOKEN(MTYPE, _add) +#define mtype_del TOKEN(MTYPE, _del) +#define mtype_list TOKEN(MTYPE, _list) +#define mtype_gc TOKEN(MTYPE, _gc) +#define mtype MTYPE + +#define ext_timeout(e, m) \ + (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) +#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) + +static void +mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +{ + struct mtype *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +mtype_destroy(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); + + ip_set_free(map->members); + if (map->dsize) + ip_set_free(map->extensions); + kfree(map); + + set->data = NULL; +} + +static void +mtype_flush(struct ip_set *set) +{ + struct mtype *map = set->data; + + memset(map->members, 0, map->memsize); +} + +static int +mtype_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct mtype *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + if (mtype_do_head(skb, map) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + + map->memsize + + map->dsize * map->elements)) || + (SET_WITH_TIMEOUT(set) && + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)))) + goto nla_put_failure; + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static int +mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + struct mtype *map = set->data; + const struct mtype_adt_elem *e = value; + void *x = get_ext(map, e->id); + int ret = mtype_do_test(e, map); + + if (ret <= 0) + return ret; + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, map))) + return 0; + return 1; +} + +static int +mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + struct mtype *map = set->data; + const struct mtype_adt_elem *e = value; + void *x = get_ext(map, e->id); + int ret = mtype_do_add(e, map, flags); + + if (ret == IPSET_ADD_FAILED) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, map))) + ret = 0; + else if (!(flags & IPSET_FLAG_EXIST)) + return -IPSET_ERR_EXIST; + } + + if (SET_WITH_TIMEOUT(set)) +#ifdef IP_SET_BITMAP_STORED_TIMEOUT + mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret); +#else + ip_set_timeout_set(ext_timeout(x, map), ext->timeout); +#endif + + return 0; +} + +static int +mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + struct mtype *map = set->data; + const struct mtype_adt_elem *e = value; + const void *x = get_ext(map, e->id); + + if (mtype_do_del(e, map) || + (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, map)))) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +mtype_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + struct mtype *map = set->data; + struct nlattr *adt, *nested; + void *x; + u32 id, first = cb->args[2]; + + adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!adt) + return -EMSGSIZE; + for (; cb->args[2] < map->elements; cb->args[2]++) { + id = cb->args[2]; + x = get_ext(map, id); + if (!test_bit(id, map->members) || + (SET_WITH_TIMEOUT(set) && +#ifdef IP_SET_BITMAP_STORED_TIMEOUT + mtype_is_filled((const struct mtype_elem *) x) && +#endif + ip_set_timeout_expired(ext_timeout(x, map)))) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, adt); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + if (mtype_do_list(skb, map, id)) + goto nla_put_failure; + if (SET_WITH_TIMEOUT(set)) { +#ifdef IP_SET_BITMAP_STORED_TIMEOUT + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_stored(map, id, + ext_timeout(x, map))))) + goto nla_put_failure; +#else + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get( + ext_timeout(x, map))))) + goto nla_put_failure; +#endif + } + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, adt); + + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, adt); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static void +mtype_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct mtype *map = set->data; + const void *x; + u32 id; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id < map->elements; id++) + if (mtype_gc_test(id, map)) { + x = get_ext(map, id); + if (ip_set_timeout_expired(ext_timeout(x, map))) + clear_bit(id, map->members); + } + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static const struct ip_set_type_variant mtype = { + .kadt = mtype_kadt, + .uadt = mtype_uadt, + .adt = { + [IPSET_ADD] = mtype_add, + [IPSET_DEL] = mtype_del, + [IPSET_TEST] = mtype_test, + }, + .destroy = mtype_destroy, + .flush = mtype_flush, + .head = mtype_head, + .list = mtype_list, + .same_set = mtype_same_set, +}; + +#endif /* __IP_SET_BITMAP_IP_GEN_H */ -- cgit v1.2.3-70-g09d2 From 1feab10d7e6ddb5e13d6a4bd93a1b877390262ec Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 8 Apr 2013 21:05:44 +0200 Subject: netfilter: ipset: Unified hash type generation Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set_ahash.h | 1241 -------------------------- net/netfilter/ipset/ip_set_hash_gen.h | 1039 +++++++++++++++++++++ 2 files changed, 1039 insertions(+), 1241 deletions(-) delete mode 100644 include/linux/netfilter/ipset/ip_set_ahash.h create mode 100644 net/netfilter/ipset/ip_set_hash_gen.h (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h deleted file mode 100644 index 0214c4c146f..00000000000 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ /dev/null @@ -1,1241 +0,0 @@ -#ifndef _IP_SET_AHASH_H -#define _IP_SET_AHASH_H - -#include -#include -#include - -#define CONCAT(a, b, c) a##b##c -#define TOKEN(a, b, c) CONCAT(a, b, c) - -#define type_pf_next TOKEN(TYPE, PF, _elem) - -/* Hashing which uses arrays to resolve clashing. The hash table is resized - * (doubled) when searching becomes too long. - * Internally jhash is used with the assumption that the size of the - * stored data is a multiple of sizeof(u32). If storage supports timeout, - * the timeout field must be the last one in the data structure - that field - * is ignored when computing the hash key. - * - * Readers and resizing - * - * Resizing can be triggered by userspace command only, and those - * are serialized by the nfnl mutex. During resizing the set is - * read-locked, so the only possible concurrent operations are - * the kernel side readers. Those must be protected by proper RCU locking. - */ - -/* Number of elements to store in an initial array block */ -#define AHASH_INIT_SIZE 4 -/* Max number of elements to store in an array block */ -#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE) - -/* Max number of elements can be tuned */ -#ifdef IP_SET_HASH_WITH_MULTI -#define AHASH_MAX(h) ((h)->ahash_max) - -static inline u8 -tune_ahash_max(u8 curr, u32 multi) -{ - u32 n; - - if (multi < curr) - return curr; - - n = curr + AHASH_INIT_SIZE; - /* Currently, at listing one hash bucket must fit into a message. - * Therefore we have a hard limit here. - */ - return n > curr && n <= 64 ? n : curr; -} -#define TUNE_AHASH_MAX(h, multi) \ - ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) -#else -#define AHASH_MAX(h) AHASH_MAX_SIZE -#define TUNE_AHASH_MAX(h, multi) -#endif - -/* A hash bucket */ -struct hbucket { - void *value; /* the array of the values */ - u8 size; /* size of the array */ - u8 pos; /* position of the first free entry */ -}; - -/* The hash table: the table size stored here in order to make resizing easy */ -struct htable { - u8 htable_bits; /* size of hash table == 2^htable_bits */ - struct hbucket bucket[0]; /* hashtable buckets */ -}; - -#define hbucket(h, i) (&((h)->bucket[i])) - -/* Book-keeping of the prefixes added to the set */ -struct ip_set_hash_nets { - u8 cidr; /* the different cidr values in the set */ - u32 nets; /* number of elements per cidr */ -}; - -/* The generic ip_set hash structure */ -struct ip_set_hash { - struct htable *table; /* the hash table */ - u32 maxelem; /* max elements in the hash */ - u32 elements; /* current element (vs timeout) */ - u32 initval; /* random jhash init value */ - u32 timeout; /* timeout value, if enabled */ - struct timer_list gc; /* garbage collection when timeout enabled */ - struct type_pf_next next; /* temporary storage for uadd */ -#ifdef IP_SET_HASH_WITH_MULTI - u8 ahash_max; /* max elements in an array block */ -#endif -#ifdef IP_SET_HASH_WITH_NETMASK - u8 netmask; /* netmask value for subnets to store */ -#endif -#ifdef IP_SET_HASH_WITH_RBTREE - struct rb_root rbtree; -#endif -#ifdef IP_SET_HASH_WITH_NETS - struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */ -#endif -}; - -static size_t -htable_size(u8 hbits) -{ - size_t hsize; - - /* We must fit both into u32 in jhash and size_t */ - if (hbits > 31) - return 0; - hsize = jhash_size(hbits); - if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket) - < hsize) - return 0; - - return hsize * sizeof(struct hbucket) + sizeof(struct htable); -} - -/* Compute htable_bits from the user input parameter hashsize */ -static u8 -htable_bits(u32 hashsize) -{ - /* Assume that hashsize == 2^htable_bits */ - u8 bits = fls(hashsize - 1); - if (jhash_size(bits) != hashsize) - /* Round up to the first 2^n value */ - bits = fls(hashsize); - - return bits; -} - -#ifdef IP_SET_HASH_WITH_NETS -#ifdef IP_SET_HASH_WITH_NETS_PACKED -/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ -#define CIDR(cidr) (cidr + 1) -#else -#define CIDR(cidr) (cidr) -#endif - -#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) -#ifdef IP_SET_HASH_WITH_MULTI -#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1) -#else -#define NETS_LENGTH(family) SET_HOST_MASK(family) -#endif - -/* Network cidr size book keeping when the hash stores different - * sized networks */ -static void -add_cidr(struct ip_set_hash *h, u8 cidr, u8 nets_length) -{ - int i, j; - - /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { - if (j != -1) - continue; - else if (h->nets[i].cidr < cidr) - j = i; - else if (h->nets[i].cidr == cidr) { - h->nets[i].nets++; - return; - } - } - if (j != -1) { - for (; i > j; i--) { - h->nets[i].cidr = h->nets[i - 1].cidr; - h->nets[i].nets = h->nets[i - 1].nets; - } - } - h->nets[i].cidr = cidr; - h->nets[i].nets = 1; -} - -static void -del_cidr(struct ip_set_hash *h, u8 cidr, u8 nets_length) -{ - u8 i, j; - - for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++) - ; - h->nets[i].nets--; - - if (h->nets[i].nets != 0) - return; - - for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) { - h->nets[j].cidr = h->nets[j + 1].cidr; - h->nets[j].nets = h->nets[j + 1].nets; - } -} -#else -#define NETS_LENGTH(family) 0 -#endif - -/* Destroy the hashtable part of the set */ -static void -ahash_destroy(struct htable *t) -{ - struct hbucket *n; - u32 i; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) - /* FIXME: use slab cache */ - kfree(n->value); - } - - ip_set_free(t); -} - -/* Calculate the actual memory size of the set data */ -static size_t -ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 nets_length) -{ - u32 i; - struct htable *t = h->table; - size_t memsize = sizeof(*h) - + sizeof(*t) -#ifdef IP_SET_HASH_WITH_NETS - + sizeof(struct ip_set_hash_nets) * nets_length -#endif - + jhash_size(t->htable_bits) * sizeof(struct hbucket); - - for (i = 0; i < jhash_size(t->htable_bits); i++) - memsize += t->bucket[i].size * dsize; - - return memsize; -} - -/* Flush a hash type of set: destroy all elements */ -static void -ip_set_hash_flush(struct ip_set *set) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - struct hbucket *n; - u32 i; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) { - n->size = n->pos = 0; - /* FIXME: use slab cache */ - kfree(n->value); - } - } -#ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct ip_set_hash_nets) - * NETS_LENGTH(set->family)); -#endif - h->elements = 0; -} - -/* Destroy a hash type of set */ -static void -ip_set_hash_destroy(struct ip_set *set) -{ - struct ip_set_hash *h = set->data; - - if (with_timeout(h->timeout)) - del_timer_sync(&h->gc); - - ahash_destroy(h->table); -#ifdef IP_SET_HASH_WITH_RBTREE - rbtree_destroy(&h->rbtree); -#endif - kfree(h); - - set->data = NULL; -} - -#endif /* _IP_SET_AHASH_H */ - -#ifndef HKEY_DATALEN -#define HKEY_DATALEN sizeof(struct type_pf_elem) -#endif - -#define HKEY(data, initval, htable_bits) \ -(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \ - & jhash_mask(htable_bits)) - -/* Type/family dependent function prototypes */ - -#define type_pf_data_equal TOKEN(TYPE, PF, _data_equal) -#define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull) -#define type_pf_data_copy TOKEN(TYPE, PF, _data_copy) -#define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out) -#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask) -#define type_pf_data_list TOKEN(TYPE, PF, _data_list) -#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist) -#define type_pf_data_next TOKEN(TYPE, PF, _data_next) -#define type_pf_data_flags TOKEN(TYPE, PF, _data_flags) -#define type_pf_data_reset_flags TOKEN(TYPE, PF, _data_reset_flags) -#ifdef IP_SET_HASH_WITH_NETS -#define type_pf_data_match TOKEN(TYPE, PF, _data_match) -#else -#define type_pf_data_match(d) 1 -#endif - -#define type_pf_elem TOKEN(TYPE, PF, _elem) -#define type_pf_telem TOKEN(TYPE, PF, _telem) -#define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout) -#define type_pf_data_expired TOKEN(TYPE, PF, _data_expired) -#define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set) - -#define type_pf_elem_add TOKEN(TYPE, PF, _elem_add) -#define type_pf_add TOKEN(TYPE, PF, _add) -#define type_pf_del TOKEN(TYPE, PF, _del) -#define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs) -#define type_pf_test TOKEN(TYPE, PF, _test) - -#define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd) -#define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem) -#define type_pf_expire TOKEN(TYPE, PF, _expire) -#define type_pf_tadd TOKEN(TYPE, PF, _tadd) -#define type_pf_tdel TOKEN(TYPE, PF, _tdel) -#define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs) -#define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest) - -#define type_pf_resize TOKEN(TYPE, PF, _resize) -#define type_pf_tresize TOKEN(TYPE, PF, _tresize) -#define type_pf_flush ip_set_hash_flush -#define type_pf_destroy ip_set_hash_destroy -#define type_pf_head TOKEN(TYPE, PF, _head) -#define type_pf_list TOKEN(TYPE, PF, _list) -#define type_pf_tlist TOKEN(TYPE, PF, _tlist) -#define type_pf_same_set TOKEN(TYPE, PF, _same_set) -#define type_pf_kadt TOKEN(TYPE, PF, _kadt) -#define type_pf_uadt TOKEN(TYPE, PF, _uadt) -#define type_pf_gc TOKEN(TYPE, PF, _gc) -#define type_pf_gc_init TOKEN(TYPE, PF, _gc_init) -#define type_pf_variant TOKEN(TYPE, PF, _variant) -#define type_pf_tvariant TOKEN(TYPE, PF, _tvariant) - -/* Flavour without timeout */ - -/* Get the ith element from the array block n */ -#define ahash_data(n, i) \ - ((struct type_pf_elem *)((n)->value) + (i)) - -/* Add an element to the hash table when resizing the set: - * we spare the maintenance of the internal counters. */ -static int -type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value, - u8 ahash_max, u32 cadt_flags) -{ - struct type_pf_elem *data; - - if (n->pos >= n->size) { - void *tmp; - - if (n->size >= ahash_max) - /* Trigger rehashing */ - return -EAGAIN; - - tmp = kzalloc((n->size + AHASH_INIT_SIZE) - * sizeof(struct type_pf_elem), - GFP_ATOMIC); - if (!tmp) - return -ENOMEM; - if (n->size) { - memcpy(tmp, n->value, - sizeof(struct type_pf_elem) * n->size); - kfree(n->value); - } - n->value = tmp; - n->size += AHASH_INIT_SIZE; - } - data = ahash_data(n, n->pos++); - type_pf_data_copy(data, value); -#ifdef IP_SET_HASH_WITH_NETS - /* Resizing won't overwrite stored flags */ - if (cadt_flags) - type_pf_data_flags(data, cadt_flags); -#endif - return 0; -} - -/* Resize a hash: create a new hash table with doubling the hashsize - * and inserting the elements to it. Repeat until we succeed or - * fail due to memory pressures. */ -static int -type_pf_resize(struct ip_set *set, bool retried) -{ - struct ip_set_hash *h = set->data; - struct htable *t, *orig = h->table; - u8 htable_bits = orig->htable_bits; - struct type_pf_elem *data; - struct hbucket *n, *m; - u32 i, j, flags = 0; - int ret; - -retry: - ret = 0; - htable_bits++; - pr_debug("attempt to resize set %s from %u to %u, t %p\n", - set->name, orig->htable_bits, htable_bits, orig); - if (!htable_bits) { - /* In case we have plenty of memory :-) */ - pr_warning("Cannot increase the hashsize of set %s further\n", - set->name); - return -IPSET_ERR_HASH_FULL; - } - t = ip_set_alloc(sizeof(*t) - + jhash_size(htable_bits) * sizeof(struct hbucket)); - if (!t) - return -ENOMEM; - t->htable_bits = htable_bits; - - read_lock_bh(&set->lock); - for (i = 0; i < jhash_size(orig->htable_bits); i++) { - n = hbucket(orig, i); - for (j = 0; j < n->pos; j++) { - data = ahash_data(n, j); -#ifdef IP_SET_HASH_WITH_NETS - flags = 0; - type_pf_data_reset_flags(data, &flags); -#endif - m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_add(m, data, AHASH_MAX(h), flags); - if (ret < 0) { -#ifdef IP_SET_HASH_WITH_NETS - type_pf_data_flags(data, flags); -#endif - read_unlock_bh(&set->lock); - ahash_destroy(t); - if (ret == -EAGAIN) - goto retry; - return ret; - } - } - } - - rcu_assign_pointer(h->table, t); - read_unlock_bh(&set->lock); - - /* Give time to other readers of the set */ - synchronize_rcu_bh(); - - pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, - orig->htable_bits, orig, t->htable_bits, t); - ahash_destroy(orig); - - return 0; -} - -static inline void -type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d); - -/* Add an element to a hash and update the internal counters when succeeded, - * otherwise report the proper error code. */ -static int -type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ - struct ip_set_hash *h = set->data; - struct htable *t; - const struct type_pf_elem *d = value; - struct hbucket *n; - int i, ret = 0; - u32 key, multi = 0; - u32 cadt_flags = flags >> 16; - - if (h->elements >= h->maxelem) { - if (net_ratelimit()) - pr_warning("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; - } - - rcu_read_lock_bh(); - t = rcu_dereference_bh(h->table); - key = HKEY(value, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) - if (type_pf_data_equal(ahash_data(n, i), d, &multi)) { -#ifdef IP_SET_HASH_WITH_NETS - if (flags & IPSET_FLAG_EXIST) - /* Support overwriting just the flags */ - type_pf_data_flags(ahash_data(n, i), - cadt_flags); -#endif - ret = -IPSET_ERR_EXIST; - goto out; - } - TUNE_AHASH_MAX(h, multi); - ret = type_pf_elem_add(n, value, AHASH_MAX(h), cadt_flags); - if (ret != 0) { - if (ret == -EAGAIN) - type_pf_data_next(h, d); - goto out; - } - -#ifdef IP_SET_HASH_WITH_NETS - add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); -#endif - h->elements++; -out: - rcu_read_unlock_bh(); - return ret; -} - -/* Delete an element from the hash: swap it with the last element - * and free up space if possible. - */ -static int -type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - const struct type_pf_elem *d = value; - struct hbucket *n; - int i; - struct type_pf_elem *data; - u32 key, multi = 0; - - key = HKEY(value, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i); - if (!type_pf_data_equal(data, d, &multi)) - continue; - if (i != n->pos - 1) - /* Not last one */ - type_pf_data_copy(data, ahash_data(n, n->pos - 1)); - - n->pos--; - h->elements--; -#ifdef IP_SET_HASH_WITH_NETS - del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); -#endif - if (n->pos + AHASH_INIT_SIZE < n->size) { - void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * sizeof(struct type_pf_elem), - GFP_ATOMIC); - if (!tmp) - return 0; - n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, - n->size * sizeof(struct type_pf_elem)); - kfree(n->value); - n->value = tmp; - } - return 0; - } - - return -IPSET_ERR_EXIST; -} - -#ifdef IP_SET_HASH_WITH_NETS - -/* Special test function which takes into account the different network - * sizes added to the set */ -static int -type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - struct hbucket *n; - const struct type_pf_elem *data; - int i, j = 0; - u32 key, multi = 0; - u8 nets_length = NETS_LENGTH(set->family); - - pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].nets && !multi; j++) { - type_pf_data_netmask(d, h->nets[j].cidr); - key = HKEY(d, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i); - if (type_pf_data_equal(data, d, &multi)) - return type_pf_data_match(data); - } - } - return 0; -} -#endif - -/* Test whether the element is added to the set */ -static int -type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - struct type_pf_elem *d = value; - struct hbucket *n; - const struct type_pf_elem *data; - int i; - u32 key, multi = 0; - -#ifdef IP_SET_HASH_WITH_NETS - /* If we test an IP address and not a network address, - * try all possible network sizes */ - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) - return type_pf_test_cidrs(set, d, timeout); -#endif - - key = HKEY(d, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i); - if (type_pf_data_equal(data, d, &multi)) - return type_pf_data_match(data); - } - return 0; -} - -/* Reply a HEADER request: fill out the header part of the set */ -static int -type_pf_head(struct ip_set *set, struct sk_buff *skb) -{ - const struct ip_set_hash *h = set->data; - struct nlattr *nested; - size_t memsize; - - read_lock_bh(&set->lock); - memsize = ahash_memsize(h, with_timeout(h->timeout) - ? sizeof(struct type_pf_telem) - : sizeof(struct type_pf_elem), - NETS_LENGTH(set->family)); - read_unlock_bh(&set->lock); - - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); - if (!nested) - goto nla_put_failure; - if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, - htonl(jhash_size(h->table->htable_bits))) || - nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) - goto nla_put_failure; -#ifdef IP_SET_HASH_WITH_NETMASK - if (h->netmask != HOST_MASK && - nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) - goto nla_put_failure; -#endif - if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - (with_timeout(h->timeout) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout)))) - goto nla_put_failure; - ipset_nest_end(skb, nested); - - return 0; -nla_put_failure: - return -EMSGSIZE; -} - -/* Reply a LIST/SAVE request: dump the elements of the specified set */ -static int -type_pf_list(const struct ip_set *set, - struct sk_buff *skb, struct netlink_callback *cb) -{ - const struct ip_set_hash *h = set->data; - const struct htable *t = h->table; - struct nlattr *atd, *nested; - const struct hbucket *n; - const struct type_pf_elem *data; - u32 first = cb->args[2]; - /* We assume that one hash bucket fills into one page */ - void *incomplete; - int i; - - atd = ipset_nest_start(skb, IPSET_ATTR_ADT); - if (!atd) - return -EMSGSIZE; - pr_debug("list hash set %s\n", set->name); - for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { - incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[2]); - pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); - for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i); - pr_debug("list hash %lu hbucket %p i %u, data %p\n", - cb->args[2], n, i, data); - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); - if (!nested) { - if (cb->args[2] == first) { - nla_nest_cancel(skb, atd); - return -EMSGSIZE; - } else - goto nla_put_failure; - } - if (type_pf_data_list(skb, data)) - goto nla_put_failure; - ipset_nest_end(skb, nested); - } - } - ipset_nest_end(skb, atd); - /* Set listing finished */ - cb->args[2] = 0; - - return 0; - -nla_put_failure: - nlmsg_trim(skb, incomplete); - ipset_nest_end(skb, atd); - if (unlikely(first == cb->args[2])) { - pr_warning("Can't list set %s: one bucket does not fit into " - "a message. Please report it!\n", set->name); - cb->args[2] = 0; - return -EMSGSIZE; - } - return 0; -} - -static int -type_pf_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, const struct ip_set_adt_opt *opt); -static int -type_pf_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); - -static const struct ip_set_type_variant type_pf_variant = { - .kadt = type_pf_kadt, - .uadt = type_pf_uadt, - .adt = { - [IPSET_ADD] = type_pf_add, - [IPSET_DEL] = type_pf_del, - [IPSET_TEST] = type_pf_test, - }, - .destroy = type_pf_destroy, - .flush = type_pf_flush, - .head = type_pf_head, - .list = type_pf_list, - .resize = type_pf_resize, - .same_set = type_pf_same_set, -}; - -/* Flavour with timeout support */ - -#define ahash_tdata(n, i) \ - (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i)) - -static inline u32 -type_pf_data_timeout(const struct type_pf_elem *data) -{ - const struct type_pf_telem *tdata = - (const struct type_pf_telem *) data; - - return tdata->timeout; -} - -static inline bool -type_pf_data_expired(const struct type_pf_elem *data) -{ - const struct type_pf_telem *tdata = - (const struct type_pf_telem *) data; - - return ip_set_timeout_expired(tdata->timeout); -} - -static inline void -type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout) -{ - struct type_pf_telem *tdata = (struct type_pf_telem *) data; - - tdata->timeout = ip_set_timeout_set(timeout); -} - -static int -type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value, - u8 ahash_max, u32 cadt_flags, u32 timeout) -{ - struct type_pf_elem *data; - - if (n->pos >= n->size) { - void *tmp; - - if (n->size >= ahash_max) - /* Trigger rehashing */ - return -EAGAIN; - - tmp = kzalloc((n->size + AHASH_INIT_SIZE) - * sizeof(struct type_pf_telem), - GFP_ATOMIC); - if (!tmp) - return -ENOMEM; - if (n->size) { - memcpy(tmp, n->value, - sizeof(struct type_pf_telem) * n->size); - kfree(n->value); - } - n->value = tmp; - n->size += AHASH_INIT_SIZE; - } - data = ahash_tdata(n, n->pos++); - type_pf_data_copy(data, value); - type_pf_data_timeout_set(data, timeout); -#ifdef IP_SET_HASH_WITH_NETS - /* Resizing won't overwrite stored flags */ - if (cadt_flags) - type_pf_data_flags(data, cadt_flags); -#endif - return 0; -} - -/* Delete expired elements from the hashtable */ -static void -type_pf_expire(struct ip_set_hash *h, u8 nets_length) -{ - struct htable *t = h->table; - struct hbucket *n; - struct type_pf_elem *data; - u32 i; - int j; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - for (j = 0; j < n->pos; j++) { - data = ahash_tdata(n, j); - if (type_pf_data_expired(data)) { - pr_debug("expired %u/%u\n", i, j); -#ifdef IP_SET_HASH_WITH_NETS - del_cidr(h, CIDR(data->cidr), nets_length); -#endif - if (j != n->pos - 1) - /* Not last one */ - type_pf_data_copy(data, - ahash_tdata(n, n->pos - 1)); - n->pos--; - h->elements--; - } - } - if (n->pos + AHASH_INIT_SIZE < n->size) { - void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * sizeof(struct type_pf_telem), - GFP_ATOMIC); - if (!tmp) - /* Still try to delete expired elements */ - continue; - n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, - n->size * sizeof(struct type_pf_telem)); - kfree(n->value); - n->value = tmp; - } - } -} - -static int -type_pf_tresize(struct ip_set *set, bool retried) -{ - struct ip_set_hash *h = set->data; - struct htable *t, *orig = h->table; - u8 htable_bits = orig->htable_bits; - struct type_pf_elem *data; - struct hbucket *n, *m; - u32 i, j, flags = 0; - int ret; - - /* Try to cleanup once */ - if (!retried) { - i = h->elements; - write_lock_bh(&set->lock); - type_pf_expire(set->data, NETS_LENGTH(set->family)); - write_unlock_bh(&set->lock); - if (h->elements < i) - return 0; - } - -retry: - ret = 0; - htable_bits++; - pr_debug("attempt to resize set %s from %u to %u, t %p\n", - set->name, orig->htable_bits, htable_bits, orig); - if (!htable_bits) { - /* In case we have plenty of memory :-) */ - pr_warning("Cannot increase the hashsize of set %s further\n", - set->name); - return -IPSET_ERR_HASH_FULL; - } - t = ip_set_alloc(sizeof(*t) - + jhash_size(htable_bits) * sizeof(struct hbucket)); - if (!t) - return -ENOMEM; - t->htable_bits = htable_bits; - - read_lock_bh(&set->lock); - for (i = 0; i < jhash_size(orig->htable_bits); i++) { - n = hbucket(orig, i); - for (j = 0; j < n->pos; j++) { - data = ahash_tdata(n, j); -#ifdef IP_SET_HASH_WITH_NETS - flags = 0; - type_pf_data_reset_flags(data, &flags); -#endif - m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), flags, - ip_set_timeout_get(type_pf_data_timeout(data))); - if (ret < 0) { -#ifdef IP_SET_HASH_WITH_NETS - type_pf_data_flags(data, flags); -#endif - read_unlock_bh(&set->lock); - ahash_destroy(t); - if (ret == -EAGAIN) - goto retry; - return ret; - } - } - } - - rcu_assign_pointer(h->table, t); - read_unlock_bh(&set->lock); - - /* Give time to other readers of the set */ - synchronize_rcu_bh(); - - ahash_destroy(orig); - - return 0; -} - -static int -type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - const struct type_pf_elem *d = value; - struct hbucket *n; - struct type_pf_elem *data; - int ret = 0, i, j = AHASH_MAX(h) + 1; - bool flag_exist = flags & IPSET_FLAG_EXIST; - u32 key, multi = 0; - u32 cadt_flags = flags >> 16; - - if (h->elements >= h->maxelem) - /* FIXME: when set is full, we slow down here */ - type_pf_expire(h, NETS_LENGTH(set->family)); - if (h->elements >= h->maxelem) { - if (net_ratelimit()) - pr_warning("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; - } - - rcu_read_lock_bh(); - t = rcu_dereference_bh(h->table); - key = HKEY(d, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_tdata(n, i); - if (type_pf_data_equal(data, d, &multi)) { - if (type_pf_data_expired(data) || flag_exist) - /* Just timeout value may be updated */ - j = i; - else { - ret = -IPSET_ERR_EXIST; - goto out; - } - } else if (j == AHASH_MAX(h) + 1 && - type_pf_data_expired(data)) - j = i; - } - if (j != AHASH_MAX(h) + 1) { - data = ahash_tdata(n, j); -#ifdef IP_SET_HASH_WITH_NETS - del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); - add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); -#endif - type_pf_data_copy(data, d); - type_pf_data_timeout_set(data, timeout); -#ifdef IP_SET_HASH_WITH_NETS - type_pf_data_flags(data, cadt_flags); -#endif - goto out; - } - TUNE_AHASH_MAX(h, multi); - ret = type_pf_elem_tadd(n, d, AHASH_MAX(h), cadt_flags, timeout); - if (ret != 0) { - if (ret == -EAGAIN) - type_pf_data_next(h, d); - goto out; - } - -#ifdef IP_SET_HASH_WITH_NETS - add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); -#endif - h->elements++; -out: - rcu_read_unlock_bh(); - return ret; -} - -static int -type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - const struct type_pf_elem *d = value; - struct hbucket *n; - int i; - struct type_pf_elem *data; - u32 key, multi = 0; - - key = HKEY(value, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_tdata(n, i); - if (!type_pf_data_equal(data, d, &multi)) - continue; - if (type_pf_data_expired(data)) - return -IPSET_ERR_EXIST; - if (i != n->pos - 1) - /* Not last one */ - type_pf_data_copy(data, ahash_tdata(n, n->pos - 1)); - - n->pos--; - h->elements--; -#ifdef IP_SET_HASH_WITH_NETS - del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); -#endif - if (n->pos + AHASH_INIT_SIZE < n->size) { - void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * sizeof(struct type_pf_telem), - GFP_ATOMIC); - if (!tmp) - return 0; - n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, - n->size * sizeof(struct type_pf_telem)); - kfree(n->value); - n->value = tmp; - } - return 0; - } - - return -IPSET_ERR_EXIST; -} - -#ifdef IP_SET_HASH_WITH_NETS -static int -type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - struct type_pf_elem *data; - struct hbucket *n; - int i, j = 0; - u32 key, multi = 0; - u8 nets_length = NETS_LENGTH(set->family); - - for (; j < nets_length && h->nets[j].nets && !multi; j++) { - type_pf_data_netmask(d, h->nets[j].cidr); - key = HKEY(d, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_tdata(n, i); -#ifdef IP_SET_HASH_WITH_MULTI - if (type_pf_data_equal(data, d, &multi)) { - if (!type_pf_data_expired(data)) - return type_pf_data_match(data); - multi = 0; - } -#else - if (type_pf_data_equal(data, d, &multi) && - !type_pf_data_expired(data)) - return type_pf_data_match(data); -#endif - } - } - return 0; -} -#endif - -static int -type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ - struct ip_set_hash *h = set->data; - struct htable *t = h->table; - struct type_pf_elem *data, *d = value; - struct hbucket *n; - int i; - u32 key, multi = 0; - -#ifdef IP_SET_HASH_WITH_NETS - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) - return type_pf_ttest_cidrs(set, d, timeout); -#endif - key = HKEY(d, h->initval, t->htable_bits); - n = hbucket(t, key); - for (i = 0; i < n->pos; i++) { - data = ahash_tdata(n, i); - if (type_pf_data_equal(data, d, &multi) && - !type_pf_data_expired(data)) - return type_pf_data_match(data); - } - return 0; -} - -static int -type_pf_tlist(const struct ip_set *set, - struct sk_buff *skb, struct netlink_callback *cb) -{ - const struct ip_set_hash *h = set->data; - const struct htable *t = h->table; - struct nlattr *atd, *nested; - const struct hbucket *n; - const struct type_pf_elem *data; - u32 first = cb->args[2]; - /* We assume that one hash bucket fills into one page */ - void *incomplete; - int i; - - atd = ipset_nest_start(skb, IPSET_ATTR_ADT); - if (!atd) - return -EMSGSIZE; - for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { - incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[2]); - for (i = 0; i < n->pos; i++) { - data = ahash_tdata(n, i); - pr_debug("list %p %u\n", n, i); - if (type_pf_data_expired(data)) - continue; - pr_debug("do list %p %u\n", n, i); - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); - if (!nested) { - if (cb->args[2] == first) { - nla_nest_cancel(skb, atd); - return -EMSGSIZE; - } else - goto nla_put_failure; - } - if (type_pf_data_tlist(skb, data)) - goto nla_put_failure; - ipset_nest_end(skb, nested); - } - } - ipset_nest_end(skb, atd); - /* Set listing finished */ - cb->args[2] = 0; - - return 0; - -nla_put_failure: - nlmsg_trim(skb, incomplete); - ipset_nest_end(skb, atd); - if (unlikely(first == cb->args[2])) { - pr_warning("Can't list set %s: one bucket does not fit into " - "a message. Please report it!\n", set->name); - cb->args[2] = 0; - return -EMSGSIZE; - } - return 0; -} - -static const struct ip_set_type_variant type_pf_tvariant = { - .kadt = type_pf_kadt, - .uadt = type_pf_uadt, - .adt = { - [IPSET_ADD] = type_pf_tadd, - [IPSET_DEL] = type_pf_tdel, - [IPSET_TEST] = type_pf_ttest, - }, - .destroy = type_pf_destroy, - .flush = type_pf_flush, - .head = type_pf_head, - .list = type_pf_tlist, - .resize = type_pf_tresize, - .same_set = type_pf_same_set, -}; - -static void -type_pf_gc(unsigned long ul_set) -{ - struct ip_set *set = (struct ip_set *) ul_set; - struct ip_set_hash *h = set->data; - - pr_debug("called\n"); - write_lock_bh(&set->lock); - type_pf_expire(h, NETS_LENGTH(set->family)); - write_unlock_bh(&set->lock); - - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; - add_timer(&h->gc); -} - -static void -type_pf_gc_init(struct ip_set *set) -{ - struct ip_set_hash *h = set->data; - - init_timer(&h->gc); - h->gc.data = (unsigned long) set; - h->gc.function = type_pf_gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; - add_timer(&h->gc); - pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(h->timeout)); -} - -#undef HKEY_DATALEN -#undef HKEY -#undef type_pf_data_equal -#undef type_pf_data_isnull -#undef type_pf_data_copy -#undef type_pf_data_zero_out -#undef type_pf_data_netmask -#undef type_pf_data_list -#undef type_pf_data_tlist -#undef type_pf_data_next -#undef type_pf_data_flags -#undef type_pf_data_reset_flags -#undef type_pf_data_match - -#undef type_pf_elem -#undef type_pf_telem -#undef type_pf_data_timeout -#undef type_pf_data_expired -#undef type_pf_data_timeout_set - -#undef type_pf_elem_add -#undef type_pf_add -#undef type_pf_del -#undef type_pf_test_cidrs -#undef type_pf_test - -#undef type_pf_elem_tadd -#undef type_pf_del_telem -#undef type_pf_expire -#undef type_pf_tadd -#undef type_pf_tdel -#undef type_pf_ttest_cidrs -#undef type_pf_ttest - -#undef type_pf_resize -#undef type_pf_tresize -#undef type_pf_flush -#undef type_pf_destroy -#undef type_pf_head -#undef type_pf_list -#undef type_pf_tlist -#undef type_pf_same_set -#undef type_pf_kadt -#undef type_pf_uadt -#undef type_pf_gc -#undef type_pf_gc_init -#undef type_pf_variant -#undef type_pf_tvariant diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h new file mode 100644 index 00000000000..2ba7d4e76cd --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -0,0 +1,1039 @@ +/* Copyright (C) 2013 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _IP_SET_HASH_GEN_H +#define _IP_SET_HASH_GEN_H + +#include +#include +#include +#ifndef rcu_dereference_bh +#define rcu_dereference_bh(p) rcu_dereference(p) +#endif + +#define CONCAT(a, b) a##b +#define TOKEN(a, b) CONCAT(a, b) + +/* Hashing which uses arrays to resolve clashing. The hash table is resized + * (doubled) when searching becomes too long. + * Internally jhash is used with the assumption that the size of the + * stored data is a multiple of sizeof(u32). If storage supports timeout, + * the timeout field must be the last one in the data structure - that field + * is ignored when computing the hash key. + * + * Readers and resizing + * + * Resizing can be triggered by userspace command only, and those + * are serialized by the nfnl mutex. During resizing the set is + * read-locked, so the only possible concurrent operations are + * the kernel side readers. Those must be protected by proper RCU locking. + */ + +/* Number of elements to store in an initial array block */ +#define AHASH_INIT_SIZE 4 +/* Max number of elements to store in an array block */ +#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE) + +/* Max number of elements can be tuned */ +#ifdef IP_SET_HASH_WITH_MULTI +#define AHASH_MAX(h) ((h)->ahash_max) + +static inline u8 +tune_ahash_max(u8 curr, u32 multi) +{ + u32 n; + + if (multi < curr) + return curr; + + n = curr + AHASH_INIT_SIZE; + /* Currently, at listing one hash bucket must fit into a message. + * Therefore we have a hard limit here. + */ + return n > curr && n <= 64 ? n : curr; +} +#define TUNE_AHASH_MAX(h, multi) \ + ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) +#else +#define AHASH_MAX(h) AHASH_MAX_SIZE +#define TUNE_AHASH_MAX(h, multi) +#endif + +/* A hash bucket */ +struct hbucket { + void *value; /* the array of the values */ + u8 size; /* size of the array */ + u8 pos; /* position of the first free entry */ +}; + +/* The hash table: the table size stored here in order to make resizing easy */ +struct htable { + u8 htable_bits; /* size of hash table == 2^htable_bits */ + struct hbucket bucket[0]; /* hashtable buckets */ +}; + +#define hbucket(h, i) (&((h)->bucket[i])) + +/* Book-keeping of the prefixes added to the set */ +struct net_prefixes { + u8 cidr; /* the different cidr values in the set */ + u32 nets; /* number of elements per cidr */ +}; + +/* Compute the hash table size */ +static size_t +htable_size(u8 hbits) +{ + size_t hsize; + + /* We must fit both into u32 in jhash and size_t */ + if (hbits > 31) + return 0; + hsize = jhash_size(hbits); + if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket) + < hsize) + return 0; + + return hsize * sizeof(struct hbucket) + sizeof(struct htable); +} + +/* Compute htable_bits from the user input parameter hashsize */ +static u8 +htable_bits(u32 hashsize) +{ + /* Assume that hashsize == 2^htable_bits */ + u8 bits = fls(hashsize - 1); + if (jhash_size(bits) != hashsize) + /* Round up to the first 2^n value */ + bits = fls(hashsize); + + return bits; +} + +/* Destroy the hashtable part of the set */ +static void +ahash_destroy(struct htable *t) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) + /* FIXME: use slab cache */ + kfree(n->value); + } + + ip_set_free(t); +} + +static int +hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) +{ + if (n->pos >= n->size) { + void *tmp; + + if (n->size >= ahash_max) + /* Trigger rehashing */ + return -EAGAIN; + + tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize, + GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + if (n->size) { + memcpy(tmp, n->value, n->size * dsize); + kfree(n->value); + } + n->value = tmp; + n->size += AHASH_INIT_SIZE; + } + return 0; +} + +#ifdef IP_SET_HASH_WITH_NETS +#ifdef IP_SET_HASH_WITH_NETS_PACKED +/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ +#define CIDR(cidr) (cidr + 1) +#else +#define CIDR(cidr) (cidr) +#endif + +#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) + +#ifdef IP_SET_HASH_WITH_MULTI +#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1) +#else +#define NETS_LENGTH(family) SET_HOST_MASK(family) +#endif + +#else +#define NETS_LENGTH(family) 0 +#endif /* IP_SET_HASH_WITH_NETS */ + +#define ext_timeout(e, h) \ +(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT]) + +#endif /* _IP_SET_HASH_GEN_H */ + +/* Family dependent templates */ + +#undef ahash_data +#undef mtype_data_equal +#undef mtype_do_data_match +#undef mtype_data_set_flags +#undef mtype_data_reset_flags +#undef mtype_data_netmask +#undef mtype_data_list +#undef mtype_data_next +#undef mtype_elem + +#undef mtype_add_cidr +#undef mtype_del_cidr +#undef mtype_ahash_memsize +#undef mtype_flush +#undef mtype_destroy +#undef mtype_gc_init +#undef mtype_same_set +#undef mtype_kadt +#undef mtype_uadt +#undef mtype + +#undef mtype_add +#undef mtype_del +#undef mtype_test_cidrs +#undef mtype_test +#undef mtype_expire +#undef mtype_resize +#undef mtype_head +#undef mtype_list +#undef mtype_gc +#undef mtype_gc_init +#undef mtype_variant +#undef mtype_data_match + +#undef HKEY + +#define mtype_data_equal TOKEN(MTYPE, _data_equal) +#ifdef IP_SET_HASH_WITH_NETS +#define mtype_do_data_match TOKEN(MTYPE, _do_data_match) +#else +#define mtype_do_data_match(d) 1 +#endif +#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags) +#define mtype_data_netmask TOKEN(MTYPE, _data_netmask) +#define mtype_data_list TOKEN(MTYPE, _data_list) +#define mtype_data_next TOKEN(MTYPE, _data_next) +#define mtype_elem TOKEN(MTYPE, _elem) +#define mtype_add_cidr TOKEN(MTYPE, _add_cidr) +#define mtype_del_cidr TOKEN(MTYPE, _del_cidr) +#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize) +#define mtype_flush TOKEN(MTYPE, _flush) +#define mtype_destroy TOKEN(MTYPE, _destroy) +#define mtype_gc_init TOKEN(MTYPE, _gc_init) +#define mtype_same_set TOKEN(MTYPE, _same_set) +#define mtype_kadt TOKEN(MTYPE, _kadt) +#define mtype_uadt TOKEN(MTYPE, _uadt) +#define mtype MTYPE + +#define mtype_elem TOKEN(MTYPE, _elem) +#define mtype_add TOKEN(MTYPE, _add) +#define mtype_del TOKEN(MTYPE, _del) +#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs) +#define mtype_test TOKEN(MTYPE, _test) +#define mtype_expire TOKEN(MTYPE, _expire) +#define mtype_resize TOKEN(MTYPE, _resize) +#define mtype_head TOKEN(MTYPE, _head) +#define mtype_list TOKEN(MTYPE, _list) +#define mtype_gc TOKEN(MTYPE, _gc) +#define mtype_variant TOKEN(MTYPE, _variant) +#define mtype_data_match TOKEN(MTYPE, _data_match) + +#ifndef HKEY_DATALEN +#define HKEY_DATALEN sizeof(struct mtype_elem) +#endif + +#define HKEY(data, initval, htable_bits) \ +(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \ + & jhash_mask(htable_bits)) + +#ifndef htype +#define htype HTYPE + +/* The generic hash structure */ +struct htype { + struct htable *table; /* the hash table */ + u32 maxelem; /* max elements in the hash */ + u32 elements; /* current element (vs timeout) */ + u32 initval; /* random jhash init value */ + u32 timeout; /* timeout value, if enabled */ + size_t dsize; /* data struct size */ + size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + struct timer_list gc; /* garbage collection when timeout enabled */ + struct mtype_elem next; /* temporary storage for uadd */ +#ifdef IP_SET_HASH_WITH_MULTI + u8 ahash_max; /* max elements in an array block */ +#endif +#ifdef IP_SET_HASH_WITH_NETMASK + u8 netmask; /* netmask value for subnets to store */ +#endif +#ifdef IP_SET_HASH_WITH_RBTREE + struct rb_root rbtree; +#endif +#ifdef IP_SET_HASH_WITH_NETS + struct net_prefixes nets[0]; /* book-keeping of prefixes */ +#endif +}; +#endif + +#ifdef IP_SET_HASH_WITH_NETS +/* Network cidr size book keeping when the hash stores different + * sized networks */ +static void +mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) +{ + int i, j; + + /* Add in increasing prefix order, so larger cidr first */ + for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { + if (j != -1) + continue; + else if (h->nets[i].cidr < cidr) + j = i; + else if (h->nets[i].cidr == cidr) { + h->nets[i].nets++; + return; + } + } + if (j != -1) { + for (; i > j; i--) { + h->nets[i].cidr = h->nets[i - 1].cidr; + h->nets[i].nets = h->nets[i - 1].nets; + } + } + h->nets[i].cidr = cidr; + h->nets[i].nets = 1; +} + +static void +mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) +{ + u8 i, j; + + for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++) + ; + h->nets[i].nets--; + + if (h->nets[i].nets != 0) + return; + + for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) { + h->nets[j].cidr = h->nets[j + 1].cidr; + h->nets[j].nets = h->nets[j + 1].nets; + } +} +#endif + +/* Calculate the actual memory size of the set data */ +static size_t +mtype_ahash_memsize(const struct htype *h, u8 nets_length) +{ + u32 i; + struct htable *t = h->table; + size_t memsize = sizeof(*h) + + sizeof(*t) +#ifdef IP_SET_HASH_WITH_NETS + + sizeof(struct net_prefixes) * nets_length +#endif + + jhash_size(t->htable_bits) * sizeof(struct hbucket); + + for (i = 0; i < jhash_size(t->htable_bits); i++) + memsize += t->bucket[i].size * h->dsize; + + return memsize; +} + +/* Flush a hash type of set: destroy all elements */ +static void +mtype_flush(struct ip_set *set) +{ + struct htype *h = set->data; + struct htable *t = h->table; + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + n->size = n->pos = 0; + /* FIXME: use slab cache */ + kfree(n->value); + } + } +#ifdef IP_SET_HASH_WITH_NETS + memset(h->nets, 0, sizeof(struct net_prefixes) + * NETS_LENGTH(set->family)); +#endif + h->elements = 0; +} + +/* Destroy a hash type of set */ +static void +mtype_destroy(struct ip_set *set) +{ + struct htype *h = set->data; + + if (set->extensions & IPSET_EXT_TIMEOUT) + del_timer_sync(&h->gc); + + ahash_destroy(h->table); +#ifdef IP_SET_HASH_WITH_RBTREE + rbtree_destroy(&h->rbtree); +#endif + kfree(h); + + set->data = NULL; +} + +static void +mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +{ + struct htype *h = set->data; + + init_timer(&h->gc); + h->gc.data = (unsigned long) set; + h->gc.function = gc; + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); + pr_debug("gc initialized, run in every %u\n", + IPSET_GC_PERIOD(h->timeout)); +} + +static bool +mtype_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct htype *x = a->data; + const struct htype *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout && +#ifdef IP_SET_HASH_WITH_NETMASK + x->netmask == y->netmask && +#endif + a->extensions == b->extensions; +} + +/* Get the ith element from the array block n */ +#define ahash_data(n, i, dsize) \ + ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) + +/* Delete expired elements from the hashtable */ +static void +mtype_expire(struct htype *h, u8 nets_length, size_t dsize) +{ + struct htable *t = h->table; + struct hbucket *n; + struct mtype_elem *data; + u32 i; + int j; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + for (j = 0; j < n->pos; j++) { + data = ahash_data(n, j, dsize); + if (ip_set_timeout_expired(ext_timeout(data, h))) { + pr_debug("expired %u/%u\n", i, j); +#ifdef IP_SET_HASH_WITH_NETS + mtype_del_cidr(h, CIDR(data->cidr), + nets_length); +#endif + if (j != n->pos - 1) + /* Not last one */ + memcpy(data, + ahash_data(n, n->pos - 1, dsize), + dsize); + n->pos--; + h->elements--; + } + } + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * dsize, + GFP_ATOMIC); + if (!tmp) + /* Still try to delete expired elements */ + continue; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, n->size * dsize); + kfree(n->value); + n->value = tmp; + } + } +} + +static void +mtype_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct htype *h = set->data; + + pr_debug("called\n"); + write_lock_bh(&set->lock); + mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + write_unlock_bh(&set->lock); + + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); +} + +/* Resize a hash: create a new hash table with doubling the hashsize + * and inserting the elements to it. Repeat until we succeed or + * fail due to memory pressures. */ +static int +mtype_resize(struct ip_set *set, bool retried) +{ + struct htype *h = set->data; + struct htable *t, *orig = h->table; + u8 htable_bits = orig->htable_bits; +#ifdef IP_SET_HASH_WITH_NETS + u8 flags; +#endif + struct mtype_elem *data; + struct mtype_elem *d; + struct hbucket *n, *m; + u32 i, j; + int ret; + + /* Try to cleanup once */ + if (SET_WITH_TIMEOUT(set) && !retried) { + i = h->elements; + write_lock_bh(&set->lock); + mtype_expire(set->data, NETS_LENGTH(set->family), + h->dsize); + write_unlock_bh(&set->lock); + if (h->elements < i) + return 0; + } + +retry: + ret = 0; + htable_bits++; + pr_debug("attempt to resize set %s from %u to %u, t %p\n", + set->name, orig->htable_bits, htable_bits, orig); + if (!htable_bits) { + /* In case we have plenty of memory :-) */ + pr_warning("Cannot increase the hashsize of set %s further\n", + set->name); + return -IPSET_ERR_HASH_FULL; + } + t = ip_set_alloc(sizeof(*t) + + jhash_size(htable_bits) * sizeof(struct hbucket)); + if (!t) + return -ENOMEM; + t->htable_bits = htable_bits; + + read_lock_bh(&set->lock); + for (i = 0; i < jhash_size(orig->htable_bits); i++) { + n = hbucket(orig, i); + for (j = 0; j < n->pos; j++) { + data = ahash_data(n, j, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS + flags = 0; + mtype_data_reset_flags(data, &flags); +#endif + m = hbucket(t, HKEY(data, h->initval, htable_bits)); + ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize); + if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS + mtype_data_reset_flags(data, &flags); +#endif + read_unlock_bh(&set->lock); + ahash_destroy(t); + if (ret == -EAGAIN) + goto retry; + return ret; + } + d = ahash_data(m, m->pos++, h->dsize); + memcpy(d, data, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS + mtype_data_reset_flags(d, &flags); +#endif + } + } + + rcu_assign_pointer(h->table, t); + read_unlock_bh(&set->lock); + + /* Give time to other readers of the set */ + synchronize_rcu_bh(); + + pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, + orig->htable_bits, orig, t->htable_bits, t); + ahash_destroy(orig); + + return 0; +} + +/* Add an element to a hash and update the internal counters when succeeded, + * otherwise report the proper error code. */ +static int +mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + struct htype *h = set->data; + struct htable *t; + const struct mtype_elem *d = value; + struct mtype_elem *data; + struct hbucket *n; + int i, ret = 0; + int j = AHASH_MAX(h) + 1; + bool flag_exist = flags & IPSET_FLAG_EXIST; + u32 key, multi = 0; + + if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) + /* FIXME: when set is full, we slow down here */ + mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + + if (h->elements >= h->maxelem) { + if (net_ratelimit()) + pr_warning("Set %s is full, maxelem %u reached\n", + set->name, h->maxelem); + return -IPSET_ERR_HASH_FULL; + } + + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i, h->dsize); + if (mtype_data_equal(data, d, &multi)) { + if (flag_exist || + (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(data, h)))) { + /* Just the extensions could be overwritten */ + j = i; + goto reuse_slot; + } else { + ret = -IPSET_ERR_EXIST; + goto out; + } + } + /* Reuse first timed out entry */ + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(data, h)) && + j != AHASH_MAX(h) + 1) + j = i; + } +reuse_slot: + if (j != AHASH_MAX(h) + 1) { + /* Fill out reused slot */ + data = ahash_data(n, j, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS + mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); + mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); +#endif + } else { + /* Use/create a new slot */ + TUNE_AHASH_MAX(h, multi); + ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize); + if (ret != 0) { + if (ret == -EAGAIN) + mtype_data_next(&h->next, d); + goto out; + } + data = ahash_data(n, n->pos++, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS + mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); +#endif + h->elements++; + } + memcpy(data, d, sizeof(struct mtype_elem)); +#ifdef IP_SET_HASH_WITH_NETS + mtype_data_set_flags(data, flags); +#endif + if (SET_WITH_TIMEOUT(set)) + ip_set_timeout_set(ext_timeout(data, h), ext->timeout); + +out: + rcu_read_unlock_bh(); + return ret; +} + +/* Delete an element from the hash: swap it with the last element + * and free up space if possible. + */ +static int +mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + struct htype *h = set->data; + struct htable *t = h->table; + const struct mtype_elem *d = value; + struct mtype_elem *data; + struct hbucket *n; + int i; + u32 key, multi = 0; + + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i, h->dsize); + if (!mtype_data_equal(data, d, &multi)) + continue; + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(data, h))) + return -IPSET_ERR_EXIST; + if (i != n->pos - 1) + /* Not last one */ + memcpy(data, ahash_data(n, n->pos - 1, h->dsize), + h->dsize); + + n->pos--; + h->elements--; +#ifdef IP_SET_HASH_WITH_NETS + mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); +#endif + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * h->dsize, + GFP_ATOMIC); + if (!tmp) + return 0; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, n->size * h->dsize); + kfree(n->value); + n->value = tmp; + } + return 0; + } + + return -IPSET_ERR_EXIST; +} + +static inline int +mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, + struct ip_set_ext *mext, struct ip_set *set, u32 flags) +{ + return mtype_do_data_match(data); +} + +#ifdef IP_SET_HASH_WITH_NETS +/* Special test function which takes into account the different network + * sizes added to the set */ +static int +mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + struct htype *h = set->data; + struct htable *t = h->table; + struct hbucket *n; + struct mtype_elem *data; + int i, j = 0; + u32 key, multi = 0; + u8 nets_length = NETS_LENGTH(set->family); + + pr_debug("test by nets\n"); + for (; j < nets_length && h->nets[j].nets && !multi; j++) { + mtype_data_netmask(d, h->nets[j].cidr); + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i, h->dsize); + if (!mtype_data_equal(data, d, &multi)) + continue; + if (SET_WITH_TIMEOUT(set)) { + if (!ip_set_timeout_expired( + ext_timeout(data, h))) + return mtype_data_match(data, ext, + mext, set, + flags); +#ifdef IP_SET_HASH_WITH_MULTI + multi = 0; +#endif + } else + return mtype_data_match(data, ext, + mext, set, flags); + } + } + return 0; +} +#endif + +/* Test whether the element is added to the set */ +static int +mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + struct htype *h = set->data; + struct htable *t = h->table; + struct mtype_elem *d = value; + struct hbucket *n; + struct mtype_elem *data; + int i; + u32 key, multi = 0; + +#ifdef IP_SET_HASH_WITH_NETS + /* If we test an IP address and not a network address, + * try all possible network sizes */ + if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) + return mtype_test_cidrs(set, d, ext, mext, flags); +#endif + + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i, h->dsize); + if (mtype_data_equal(data, d, &multi) && + !(SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(data, h)))) + return mtype_data_match(data, ext, mext, set, flags); + } + return 0; +} + +/* Reply a HEADER request: fill out the header part of the set */ +static int +mtype_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct htype *h = set->data; + struct nlattr *nested; + size_t memsize; + + read_lock_bh(&set->lock); + memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family)); + read_unlock_bh(&set->lock); + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, + htonl(jhash_size(h->table->htable_bits))) || + nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) + goto nla_put_failure; +#ifdef IP_SET_HASH_WITH_NETMASK + if (h->netmask != HOST_MASK && + nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) + goto nla_put_failure; +#endif + if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + ((set->extensions & IPSET_EXT_TIMEOUT) && + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout)))) + goto nla_put_failure; + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +/* Reply a LIST/SAVE request: dump the elements of the specified set */ +static int +mtype_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct htype *h = set->data; + const struct htable *t = h->table; + struct nlattr *atd, *nested; + const struct hbucket *n; + const struct mtype_elem *e; + u32 first = cb->args[2]; + /* We assume that one hash bucket fills into one page */ + void *incomplete; + int i; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + pr_debug("list hash set %s\n", set->name); + for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + incomplete = skb_tail_pointer(skb); + n = hbucket(t, cb->args[2]); + pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + for (i = 0; i < n->pos; i++) { + e = ahash_data(n, i, h->dsize); + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, h))) + continue; + pr_debug("list hash %lu hbucket %p i %u, data %p\n", + cb->args[2], n, i, e); + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (cb->args[2] == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + if (mtype_data_list(skb, e)) + goto nla_put_failure; + if (SET_WITH_TIMEOUT(set) && + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get( + ext_timeout(e, h))))) + goto nla_put_failure; + ipset_nest_end(skb, nested); + } + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nlmsg_trim(skb, incomplete); + ipset_nest_end(skb, atd); + if (unlikely(first == cb->args[2])) { + pr_warning("Can't list set %s: one bucket does not fit into " + "a message. Please report it!\n", set->name); + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static int +TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); + +static int +TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); + +static const struct ip_set_type_variant mtype_variant = { + .kadt = mtype_kadt, + .uadt = mtype_uadt, + .adt = { + [IPSET_ADD] = mtype_add, + [IPSET_DEL] = mtype_del, + [IPSET_TEST] = mtype_test, + }, + .destroy = mtype_destroy, + .flush = mtype_flush, + .head = mtype_head, + .list = mtype_list, + .resize = mtype_resize, + .same_set = mtype_same_set, +}; + +#ifdef IP_SET_EMIT_CREATE +static int +TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; +#ifdef IP_SET_HASH_WITH_NETMASK + u8 netmask; +#endif + size_t hsize; + struct HTYPE *h; + + if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) + return -IPSET_ERR_INVALID_FAMILY; +#ifdef IP_SET_HASH_WITH_NETMASK + netmask = set->family == NFPROTO_IPV4 ? 32 : 128; + pr_debug("Create set %s with family %s\n", + set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); +#endif + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + +#ifdef IP_SET_HASH_WITH_NETMASK + if (tb[IPSET_ATTR_NETMASK]) { + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + + if ((set->family == NFPROTO_IPV4 && netmask > 32) || + (set->family == NFPROTO_IPV6 && netmask > 128) || + netmask == 0) + return -IPSET_ERR_INVALID_NETMASK; + } +#endif + + hsize = sizeof(*h); +#ifdef IP_SET_HASH_WITH_NETS + hsize += sizeof(struct net_prefixes) * + (set->family == NFPROTO_IPV4 ? 32 : 128); +#endif + h = kzalloc(hsize, GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; +#ifdef IP_SET_HASH_WITH_NETMASK + h->netmask = netmask; +#endif + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + if (set->family == NFPROTO_IPV4) + set->variant = &TOKEN(HTYPE, 4_variant); + else + set->variant = &TOKEN(HTYPE, 6_variant); + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->extensions |= IPSET_EXT_TIMEOUT; + if (set->family == NFPROTO_IPV4) { + h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem)); + h->offset[IPSET_OFFSET_TIMEOUT] = + offsetof(struct TOKEN(HTYPE, 4t_elem), + timeout); + TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc)); + } else { + h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem)); + h->offset[IPSET_OFFSET_TIMEOUT] = + offsetof(struct TOKEN(HTYPE, 6t_elem), + timeout); + TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc)); + } + } else { + if (set->family == NFPROTO_IPV4) + h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem)); + else + h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem)); + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} +#endif /* IP_SET_EMIT_CREATE */ -- cgit v1.2.3-70-g09d2 From 34d666d489cf70c246ca99b2387741915c34b88c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Apr 2013 14:38:56 +0200 Subject: netfilter: ipset: Introduce the counter extension in the core Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 75 +++++++++++++++++++++++++++-- include/uapi/linux/netfilter/ipset/ip_set.h | 5 ++ net/netfilter/ipset/ip_set_core.c | 10 ++++ 3 files changed, 86 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index bf0220cbf46..0f978ebfaef 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -52,18 +52,24 @@ enum ip_set_extension { IPSET_EXT_NONE = 0, IPSET_EXT_BIT_TIMEOUT = 1, IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT), + IPSET_EXT_BIT_COUNTER = 2, + IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), }; /* Extension offsets */ enum ip_set_offset { IPSET_OFFSET_TIMEOUT = 0, + IPSET_OFFSET_COUNTER, IPSET_OFFSET_MAX, }; #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) +#define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) struct ip_set_ext { unsigned long timeout; + u64 packets; + u64 bytes; }; struct ip_set; @@ -177,6 +183,65 @@ struct ip_set { void *data; }; +struct ip_set_counter { + atomic64_t bytes; + atomic64_t packets; +}; + +static inline void +ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) +{ + atomic64_add((long long)bytes, &(counter)->bytes); +} + +static inline void +ip_set_add_packets(u64 packets, struct ip_set_counter *counter) +{ + atomic64_add((long long)packets, &(counter)->packets); +} + +static inline u64 +ip_set_get_bytes(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->bytes); +} + +static inline u64 +ip_set_get_packets(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->packets); +} + +static inline void +ip_set_update_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + if (ext->packets != ULLONG_MAX) { + ip_set_add_bytes(ext->bytes, counter); + ip_set_add_packets(ext->packets, counter); + } +} + +static inline bool +ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) +{ + return nla_put_net64(skb, IPSET_ATTR_BYTES, + cpu_to_be64(ip_set_get_bytes(counter))) || + nla_put_net64(skb, IPSET_ATTR_PACKETS, + cpu_to_be64(ip_set_get_packets(counter))); +} + +static inline void +ip_set_init_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext) +{ + if (ext->bytes != ULLONG_MAX) + atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); + if (ext->packets != ULLONG_MAX) + atomic64_set(&(counter)->packets, (long long)(ext->packets)); +} + /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set); extern void ip_set_put_byindex(ip_set_id_t index); @@ -318,10 +383,12 @@ bitmap_bytes(u32 a, u32 b) #include -#define IP_SET_INIT_KEXT(skb, opt, map) \ - { .timeout = ip_set_adt_opt_timeout(opt, map) } +#define IP_SET_INIT_KEXT(skb, opt, map) \ + { .bytes = (skb)->len, .packets = 1, \ + .timeout = ip_set_adt_opt_timeout(opt, map) } -#define IP_SET_INIT_UEXT(map) \ - { .timeout = (map)->timeout } +#define IP_SET_INIT_UEXT(map) \ + { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ + .timeout = (map)->timeout } #endif /*_IP_SET_H */ diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index fbee42807a1..ed452675d15 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -108,6 +108,8 @@ enum { IPSET_ATTR_CIDR2, IPSET_ATTR_IP2_TO, IPSET_ATTR_IFACE, + IPSET_ATTR_BYTES, + IPSET_ATTR_PACKETS, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) @@ -137,6 +139,7 @@ enum ipset_errno { IPSET_ERR_REFERENCED, IPSET_ERR_IPADDR_IPV4, IPSET_ERR_IPADDR_IPV6, + IPSET_ERR_COUNTER, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, @@ -161,6 +164,8 @@ enum ipset_cadt_flags { IPSET_FLAG_PHYSDEV = (1 << IPSET_FLAG_BIT_PHYSDEV), IPSET_FLAG_BIT_NOMATCH = 2, IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH), + IPSET_FLAG_BIT_WITH_COUNTERS = 3, + IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), IPSET_FLAG_CADT_MAX = 15, /* Upper half */ }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 4486285d10d..f6d878a46c4 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -324,6 +324,16 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_TIMEOUT; ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } + if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) { + if (!(set->extensions & IPSET_EXT_COUNTER)) + return -IPSET_ERR_COUNTER; + if (tb[IPSET_ATTR_BYTES]) + ext->bytes = be64_to_cpu(nla_get_be64( + tb[IPSET_ATTR_BYTES])); + if (tb[IPSET_ATTR_PACKETS]) + ext->packets = be64_to_cpu(nla_get_be64( + tb[IPSET_ATTR_PACKETS])); + } return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); -- cgit v1.2.3-70-g09d2 From 6e01781d1c80e2e8263471252a631e86165b15c5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Apr 2013 14:40:50 +0200 Subject: netfilter: ipset: set match: add support to match the counters The new revision of the set match supports to match the counters and to suppress updating the counters at matching too. At the set:list types, the updating of the subcounters can be suppressed as well. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 9 +++- include/uapi/linux/netfilter/ipset/ip_set.h | 31 +++++++++++-- include/uapi/linux/netfilter/xt_set.h | 9 ++++ net/netfilter/ipset/ip_set_core.c | 2 +- net/netfilter/ipset/ip_set_list_set.c | 8 +++- net/netfilter/xt_set.c | 70 +++++++++++++++++++++++++++++ 6 files changed, 120 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0f978ebfaef..d80e2753847 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -76,7 +76,7 @@ struct ip_set; typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags); + struct ip_set_ext *mext, u32 cmdflags); /* Kernel API function options */ struct ip_set_adt_opt { @@ -217,10 +217,15 @@ ip_set_update_counter(struct ip_set_counter *counter, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { - if (ext->packets != ULLONG_MAX) { + if (ext->packets != ULLONG_MAX && + !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { ip_set_add_bytes(ext->bytes, counter); ip_set_add_packets(ext->packets, counter); } + if (flags & IPSET_FLAG_MATCH_COUNTERS) { + mext->packets = ip_set_get_packets(counter); + mext->bytes = ip_set_get_bytes(counter); + } } static inline bool diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index ed452675d15..8024cdf13b7 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -145,7 +145,7 @@ enum ipset_errno { IPSET_ERR_TYPE_SPECIFIC = 4352, }; -/* Flags at command level */ +/* Flags at command level or match/target flags, lower half of cmdattrs*/ enum ipset_cmd_flags { IPSET_FLAG_BIT_EXIST = 0, IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), @@ -153,10 +153,20 @@ enum ipset_cmd_flags { IPSET_FLAG_LIST_SETNAME = (1 << IPSET_FLAG_BIT_LIST_SETNAME), IPSET_FLAG_BIT_LIST_HEADER = 2, IPSET_FLAG_LIST_HEADER = (1 << IPSET_FLAG_BIT_LIST_HEADER), - IPSET_FLAG_CMD_MAX = 15, /* Lower half */ + IPSET_FLAG_BIT_SKIP_COUNTER_UPDATE = 3, + IPSET_FLAG_SKIP_COUNTER_UPDATE = + (1 << IPSET_FLAG_BIT_SKIP_COUNTER_UPDATE), + IPSET_FLAG_BIT_SKIP_SUBCOUNTER_UPDATE = 4, + IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE = + (1 << IPSET_FLAG_BIT_SKIP_SUBCOUNTER_UPDATE), + IPSET_FLAG_BIT_MATCH_COUNTERS = 5, + IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS), + IPSET_FLAG_BIT_RETURN_NOMATCH = 7, + IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH), + IPSET_FLAG_CMD_MAX = 15, }; -/* Flags at CADT attribute level */ +/* Flags at CADT attribute level, upper half of cmdattrs */ enum ipset_cadt_flags { IPSET_FLAG_BIT_BEFORE = 0, IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), @@ -166,7 +176,7 @@ enum ipset_cadt_flags { IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH), IPSET_FLAG_BIT_WITH_COUNTERS = 3, IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), - IPSET_FLAG_CADT_MAX = 15, /* Upper half */ + IPSET_FLAG_CADT_MAX = 15, }; /* Commands with settype-specific attributes */ @@ -195,6 +205,7 @@ enum ip_set_dim { * If changed, new revision of iptables match/target is required. */ IPSET_DIM_MAX = 6, + /* Backward compatibility: set match revision 2 */ IPSET_BIT_RETURN_NOMATCH = 7, }; @@ -207,6 +218,18 @@ enum ip_set_kopt { IPSET_RETURN_NOMATCH = (1 << IPSET_BIT_RETURN_NOMATCH), }; +enum { + IPSET_COUNTER_NONE = 0, + IPSET_COUNTER_EQ, + IPSET_COUNTER_NE, + IPSET_COUNTER_LT, + IPSET_COUNTER_GT, +}; + +struct ip_set_counter_match { + __u8 op; + __u64 value; +}; /* Interface to iptables/ip6tables */ diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h index e3a9978f259..964d3d42f87 100644 --- a/include/uapi/linux/netfilter/xt_set.h +++ b/include/uapi/linux/netfilter/xt_set.h @@ -62,4 +62,13 @@ struct xt_set_info_target_v2 { __u32 timeout; }; +/* Revision 3 match */ + +struct xt_set_info_match_v3 { + struct xt_set_info match_set; + struct ip_set_counter_match packets; + struct ip_set_counter_match bytes; + __u32 flags; +}; + #endif /*_XT_SET_H*/ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f6d878a46c4..f7713900798 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -413,7 +413,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, ret = 1; } else { /* --return-nomatch: invert matched element */ - if ((opt->flags & IPSET_RETURN_NOMATCH) && + if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) && (set->type->features & IPSET_TYPE_NOMATCH) && (ret > 0 || ret == -ENOTEMPTY)) ret = -ret; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index c09022e3740..979b8c90e42 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -84,9 +84,13 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, { struct list_set *map = set->data; struct set_elem *e; - u32 i; + u32 i, cmdflags = opt->cmdflags; int ret; + /* Don't lookup sub-counters at all */ + opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; + if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) + opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; for (i = 0; i < map->size; i++) { e = list_set_elem(map, i); if (e->id == IPSET_INVALID_ID) @@ -99,7 +103,7 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, if (SET_WITH_COUNTER(set)) ip_set_update_counter(ext_counter(e, map), ext, &opt->ext, - opt->cmdflags); + cmdflags); return ret; } } diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 636c5199ff3..31790e789e2 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -189,6 +189,9 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, 0, UINT_MAX); + if (opt.flags & IPSET_RETURN_NOMATCH) + opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; + return match_set(info->match_set.index, skb, par, &opt, info->match_set.flags & IPSET_INV_MATCH); } @@ -317,6 +320,52 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) #define set_target_v2_checkentry set_target_v1_checkentry #define set_target_v2_destroy set_target_v1_destroy +/* Revision 3 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ + switch (info->op) { + case IPSET_COUNTER_NONE: + return true; + case IPSET_COUNTER_EQ: + return counter == info->value; + case IPSET_COUNTER_NE: + return counter != info->value; + case IPSET_COUNTER_LT: + return counter < info->value; + case IPSET_COUNTER_GT: + return counter > info->value; + } + return false; +} + +static bool +set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v3 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, info->flags, UINT_MAX); + int ret; + + if (info->packets.op != IPSET_COUNTER_NONE || + info->bytes.op != IPSET_COUNTER_NONE) + opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + + ret = match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); + + if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) + return ret; + + if (!match_counter(opt.ext.packets, &info->packets)) + return 0; + return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v3_checkentry set_match_v1_checkentry +#define set_match_v3_destroy set_match_v1_destroy + static struct xt_match set_matches[] __read_mostly = { { .name = "set", @@ -369,6 +418,27 @@ static struct xt_match set_matches[] __read_mostly = { .destroy = set_match_v1_destroy, .me = THIS_MODULE }, + /* counters support: update, match */ + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 3, + .match = set_match_v3, + .matchsize = sizeof(struct xt_set_info_match_v3), + .checkentry = set_match_v3_checkentry, + .destroy = set_match_v3_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 3, + .match = set_match_v3, + .matchsize = sizeof(struct xt_set_info_match_v3), + .checkentry = set_match_v3_checkentry, + .destroy = set_match_v3_destroy, + .me = THIS_MODULE + }, }; static struct xt_target set_targets[] __read_mostly = { -- cgit v1.2.3-70-g09d2 From a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Apr 2013 04:58:25 +0000 Subject: netfilter: move skb_gso_segment into nfnetlink_queue module skb_gso_segment is expensive, so it would be nice if we could avoid it in the future. However, userspace needs to be prepared to receive larger-than-mtu-packets (which will also have incorrect l3/l4 checksums), so we cannot simply remove it. The plan is to add a per-queue feature flag that userspace can set when binding the queue. The problem is that in nf_queue, we only have a queue number, not the queue context/configuration settings. This patch should have no impact other than the skb_gso_segment call now being in a function that has access to the queue config data. A new size attribute in nf_queue_entry is needed so nfnetlink_queue can duplicate the entry of the gso skb when segmenting the skb while also copying the route key. The follow up patch adds switch to disable skb_gso_segment when queue config says so. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 6 ++ net/netfilter/nf_queue.c | 96 ++-------------------- net/netfilter/nfnetlink_queue_core.c | 154 +++++++++++++++++++++++++++++++---- 3 files changed, 152 insertions(+), 104 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index fb1c0be38b6..aaba4bbcdda 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -9,10 +9,13 @@ struct nf_queue_entry { struct nf_hook_ops *elem; u_int8_t pf; + u16 size; /* sizeof(entry) + saved route keys */ unsigned int hook; struct net_device *indev; struct net_device *outdev; int (*okfn)(struct sk_buff *); + + /* extra space to store route keys */ }; #define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry)) @@ -27,4 +30,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); +void nf_queue_entry_release_refs(struct nf_queue_entry *entry); + #endif /* _NF_QUEUE_H */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 1d91e77ba4c..5d24b1fdb59 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -45,7 +45,7 @@ void nf_unregister_queue_handler(void) } EXPORT_SYMBOL(nf_unregister_queue_handler); -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) +void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { /* Release those devices we held, or Alexey will kill me. */ if (entry->indev) @@ -65,9 +65,10 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) /* Drop reference to owner of hook which queued us. */ module_put(entry->elem->owner); } +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); /* Bump dev refs so they don't vanish while packet is out */ -static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { if (!try_module_get(entry->elem->owner)) return false; @@ -92,12 +93,13 @@ static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) return true; } +EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); /* * Any packet that leaves via this function must come back * through nf_reinject(). */ -static int __nf_queue(struct sk_buff *skb, +int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, @@ -137,6 +139,7 @@ static int __nf_queue(struct sk_buff *skb, .indev = indev, .outdev = outdev, .okfn = okfn, + .size = sizeof(*entry) + afinfo->route_key_size, }; if (!nf_queue_entry_get_refs(entry)) { @@ -163,87 +166,6 @@ err: return status; } -#ifdef CONFIG_BRIDGE_NETFILTER -/* When called from bridge netfilter, skb->data must point to MAC header - * before calling skb_gso_segment(). Else, original MAC header is lost - * and segmented skbs will be sent to wrong destination. - */ -static void nf_bridge_adjust_skb_data(struct sk_buff *skb) -{ - if (skb->nf_bridge) - __skb_push(skb, skb->network_header - skb->mac_header); -} - -static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) -{ - if (skb->nf_bridge) - __skb_pull(skb, skb->network_header - skb->mac_header); -} -#else -#define nf_bridge_adjust_skb_data(s) do {} while (0) -#define nf_bridge_adjust_segmented_data(s) do {} while (0) -#endif - -int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, - u_int8_t pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) -{ - struct sk_buff *segs; - int err = -EINVAL; - unsigned int queued; - - if (!skb_is_gso(skb)) - return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - queuenum); - - switch (pf) { - case NFPROTO_IPV4: - skb->protocol = htons(ETH_P_IP); - break; - case NFPROTO_IPV6: - skb->protocol = htons(ETH_P_IPV6); - break; - } - - nf_bridge_adjust_skb_data(skb); - segs = skb_gso_segment(skb, 0); - /* Does not use PTR_ERR to limit the number of error codes that can be - * returned by nf_queue. For instance, callers rely on -ECANCELED to mean - * 'ignore this hook'. - */ - if (IS_ERR(segs)) - goto out_err; - queued = 0; - err = 0; - do { - struct sk_buff *nskb = segs->next; - - segs->next = NULL; - if (err == 0) { - nf_bridge_adjust_segmented_data(segs); - err = __nf_queue(segs, elem, pf, hook, indev, - outdev, okfn, queuenum); - } - if (err == 0) - queued++; - else - kfree_skb(segs); - segs = nskb; - } while (segs); - - if (queued) { - kfree_skb(skb); - return 0; - } - out_err: - nf_bridge_adjust_segmented_data(skb); - return err; -} - void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct sk_buff *skb = entry->skb; @@ -283,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = __nf_queue(skb, elem, entry->pf, entry->hook, - entry->indev, entry->outdev, entry->okfn, - verdict >> NF_VERDICT_QBITS); + err = nf_queue(skb, elem, entry->pf, entry->hook, + entry->indev, entry->outdev, entry->okfn, + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index ef3cdb4bfee..edbae4caf75 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -477,28 +477,13 @@ nla_put_failure: } static int -nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, + struct nf_queue_entry *entry) { struct sk_buff *nskb; - struct nfqnl_instance *queue; int err = -ENOBUFS; __be32 *packet_id_ptr; int failopen = 0; - struct net *net = dev_net(entry->indev ? - entry->indev : entry->outdev); - struct nfnl_queue_net *q = nfnl_queue_pernet(net); - - /* rcu_read_lock()ed by nf_hook_slow() */ - queue = instance_lookup(q, queuenum); - if (!queue) { - err = -ESRCH; - goto err_out; - } - - if (queue->copy_mode == NFQNL_COPY_NONE) { - err = -EINVAL; - goto err_out; - } nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); if (nskb == NULL) { @@ -547,6 +532,141 @@ err_out: return err; } +static struct nf_queue_entry * +nf_queue_entry_dup(struct nf_queue_entry *e) +{ + struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); + if (entry) { + if (nf_queue_entry_get_refs(entry)) + return entry; + kfree(entry); + } + return NULL; +} + +#ifdef CONFIG_BRIDGE_NETFILTER +/* When called from bridge netfilter, skb->data must point to MAC header + * before calling skb_gso_segment(). Else, original MAC header is lost + * and segmented skbs will be sent to wrong destination. + */ +static void nf_bridge_adjust_skb_data(struct sk_buff *skb) +{ + if (skb->nf_bridge) + __skb_push(skb, skb->network_header - skb->mac_header); +} + +static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) +{ + if (skb->nf_bridge) + __skb_pull(skb, skb->network_header - skb->mac_header); +} +#else +#define nf_bridge_adjust_skb_data(s) do {} while (0) +#define nf_bridge_adjust_segmented_data(s) do {} while (0) +#endif + +static void free_entry(struct nf_queue_entry *entry) +{ + nf_queue_entry_release_refs(entry); + kfree(entry); +} + +static int +__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue, + struct sk_buff *skb, struct nf_queue_entry *entry) +{ + int ret = -ENOMEM; + struct nf_queue_entry *entry_seg; + + nf_bridge_adjust_segmented_data(skb); + + if (skb->next == NULL) { /* last packet, no need to copy entry */ + struct sk_buff *gso_skb = entry->skb; + entry->skb = skb; + ret = __nfqnl_enqueue_packet(net, queue, entry); + if (ret) + entry->skb = gso_skb; + return ret; + } + + skb->next = NULL; + + entry_seg = nf_queue_entry_dup(entry); + if (entry_seg) { + entry_seg->skb = skb; + ret = __nfqnl_enqueue_packet(net, queue, entry_seg); + if (ret) + free_entry(entry_seg); + } + return ret; +} + +static int +nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +{ + unsigned int queued; + struct nfqnl_instance *queue; + struct sk_buff *skb, *segs; + int err = -ENOBUFS; + struct net *net = dev_net(entry->indev ? + entry->indev : entry->outdev); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + + /* rcu_read_lock()ed by nf_hook_slow() */ + queue = instance_lookup(q, queuenum); + if (!queue) + return -ESRCH; + + if (queue->copy_mode == NFQNL_COPY_NONE) + return -EINVAL; + + if (!skb_is_gso(entry->skb)) + return __nfqnl_enqueue_packet(net, queue, entry); + + skb = entry->skb; + + switch (entry->pf) { + case NFPROTO_IPV4: + skb->protocol = htons(ETH_P_IP); + break; + case NFPROTO_IPV6: + skb->protocol = htons(ETH_P_IPV6); + break; + } + + nf_bridge_adjust_skb_data(skb); + segs = skb_gso_segment(skb, 0); + /* Does not use PTR_ERR to limit the number of error codes that can be + * returned by nf_queue. For instance, callers rely on -ECANCELED to + * mean 'ignore this hook'. + */ + if (IS_ERR(segs)) + goto out_err; + queued = 0; + err = 0; + do { + struct sk_buff *nskb = segs->next; + if (err == 0) + err = __nfqnl_enqueue_packet_gso(net, queue, + segs, entry); + if (err == 0) + queued++; + else + kfree_skb(segs); + segs = nskb; + } while (segs); + + if (queued) { + if (err) /* some segments are already queued */ + free_entry(entry); + kfree_skb(skb); + return 0; + } + out_err: + nf_bridge_adjust_segmented_data(skb); + return err; +} + static int nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) { -- cgit v1.2.3-70-g09d2 From 7237190df8c4129241697530a4eecabdc4ecc66e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Apr 2013 04:58:26 +0000 Subject: netfilter: nfnetlink_queue: add skb info attribute Once we allow userspace to receive gso/gro packets, userspace needs to be able to determine when checksums appear to be broken, but are not. NFQA_SKB_CSUMNOTREADY means 'checksums will be fixed in kernel later, pretend they are ok'. NFQA_SKB_GSO could be used for statistics, or to determine when packet size exceeds mtu. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 7 +++++++ net/netfilter/nfnetlink_queue_core.c | 16 ++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index 70ec8c2bc11..0069da37046 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -45,6 +45,7 @@ enum nfqnl_attr_type { NFQA_CT, /* nf_conntrack_netlink.h */ NFQA_CT_INFO, /* enum ip_conntrack_info */ NFQA_CAP_LEN, /* __u32 length of captured packet */ + NFQA_SKB_INFO, /* __u32 skb meta information */ __NFQA_MAX }; @@ -98,4 +99,10 @@ enum nfqnl_attr_config { #define NFQA_CFG_F_CONNTRACK (1 << 1) #define NFQA_CFG_F_MAX (1 << 2) +/* flags for NFQA_SKB_INFO */ +/* packet appears to have wrong checksums, but they are ok */ +#define NFQA_SKB_CSUMNOTREADY (1 << 0) +/* packet is GSO (i.e., exceeds device mtu) */ +#define NFQA_SKB_GSO (1 << 1) + #endif /* _NFNETLINK_QUEUE_H */ diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index edbae4caf75..d052cd6da5d 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -272,6 +272,18 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) skb_shinfo(to)->nr_frags = j; } +static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) +{ + __u32 flags = 0; + + if (packet->ip_summed == CHECKSUM_PARTIAL) + flags = NFQA_SKB_CSUMNOTREADY; + if (skb_is_gso(packet)) + flags |= NFQA_SKB_GSO; + + return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0; +} + static struct sk_buff * nfqnl_build_packet_message(struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -301,6 +313,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, #endif + nla_total_size(sizeof(u_int32_t)) /* mark */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + + nla_total_size(sizeof(u_int32_t)) /* skbinfo */ + nla_total_size(sizeof(u_int32_t)); /* cap_len */ if (entskb->tstamp.tv64) @@ -454,6 +467,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; + if (nfqnl_put_packet_info(skb, entskb)) + goto nla_put_failure; + if (data_len) { struct nlattr *nla; -- cgit v1.2.3-70-g09d2 From 00bd1cc24a7dd295ee095dc50791aab6ede46c7a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Apr 2013 04:58:27 +0000 Subject: netfilter: nfnetlink_queue: avoid expensive gso segmentation and checksum fixup Userspace can now indicate that it can cope with larger-than-mtu sized packets and packets that have invalid ipv4/tcp checksums. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 3 ++- net/netfilter/nfnetlink_queue_core.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index 0069da37046..a2308ae5a73 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -97,7 +97,8 @@ enum nfqnl_attr_config { /* Flags for NFQA_CFG_FLAGS */ #define NFQA_CFG_F_FAIL_OPEN (1 << 0) #define NFQA_CFG_F_CONNTRACK (1 << 1) -#define NFQA_CFG_F_MAX (1 << 2) +#define NFQA_CFG_F_GSO (1 << 2) +#define NFQA_CFG_F_MAX (1 << 3) /* flags for NFQA_SKB_INFO */ /* packet appears to have wrong checksums, but they are ok */ diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index d052cd6da5d..2e0e835baf7 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -327,7 +327,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, break; case NFQNL_COPY_PACKET: - if (entskb->ip_summed == CHECKSUM_PARTIAL && + if (!(queue->flags & NFQA_CFG_F_GSO) && + entskb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(entskb)) return NULL; @@ -636,7 +637,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) if (queue->copy_mode == NFQNL_COPY_NONE) return -EINVAL; - if (!skb_is_gso(entry->skb)) + if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(entry->skb)) return __nfqnl_enqueue_packet(net, queue, entry); skb = entry->skb; -- cgit v1.2.3-70-g09d2 From eee1d5a14780b9391ec51f3feaf4cffb521ddbb1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 19 Apr 2013 10:54:58 +0900 Subject: sctp: Correct type and usage of sctp_end_cksum() Change the type of the crc32 parameter of sctp_end_cksum() from __be32 to __u32 to reflect that fact that it is passed to cpu_to_le32(). There are five in-tree users of sctp_end_cksum(). The following four had warnings flagged by sparse which are no longer present with this change. net/netfilter/ipvs/ip_vs_proto_sctp.c:sctp_nat_csum() net/netfilter/ipvs/ip_vs_proto_sctp.c:sctp_csum_check() net/sctp/input.c:sctp_rcv_checksum() net/sctp/output.c:sctp_packet_transmit() The fifth user is net/netfilter/nf_nat_proto_sctp.c:sctp_manip_pkt(). It has been updated to pass a __u32 instead of a __be32, the value in question was already calculated in cpu byte-order. net/netfilter/nf_nat_proto_sctp.c:sctp_manip_pkt() has also been updated to assign the return value of sctp_end_cksum() directly to a variable of type __le32, matching the type of the return value. Previously the return value was assigned to a variable of type __be32 and then that variable was finally assigned to another variable of type __le32. Problems flagged by sparse. Compile and sparse tested only. Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/sctp/checksum.h | 2 +- net/netfilter/nf_nat_proto_sctp.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index befc8d2a1b9..5a2110d3176 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -77,7 +77,7 @@ static inline __u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32) return sctp_crc32c(crc32, buffer, length); } -static inline __le32 sctp_end_cksum(__be32 crc32) +static inline __le32 sctp_end_cksum(__u32 crc32) { return cpu_to_le32(~crc32); } diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index e64faa5ca89..396e55d46f9 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb, { struct sk_buff *frag; sctp_sctphdr_t *hdr; - __be32 crc32; + __u32 crc32; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; @@ -55,8 +55,7 @@ sctp_manip_pkt(struct sk_buff *skb, skb_walk_frags(skb, frag) crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), crc32); - crc32 = sctp_end_cksum(crc32); - hdr->checksum = crc32; + hdr->checksum = sctp_end_cksum(crc32); return true; } -- cgit v1.2.3-70-g09d2 From aebda156a570782a86fc4426842152237a19427d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Apr 2013 05:58:52 +0000 Subject: net: defer net_secret[] initialization Instead of feeding net_secret[] at boot time, defer the init at the point first socket is created. This permits some platforms to use better entropy sources than the ones available at boot time. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/secure_seq.h | 1 + net/core/secure_seq.c | 4 +--- net/ipv4/af_inet.c | 5 ++++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index c2e542b27a5..6ca975bebd3 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,6 +3,7 @@ #include +extern void net_secret_init(void); extern __u32 secure_ip_id(__be32 daddr); extern __u32 secure_ipv6_id(const __be32 daddr[4]); extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index e61a8bb7fce..6a2f13cee86 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -12,12 +12,10 @@ static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static int __init net_secret_init(void) +void net_secret_init(void) { get_random_bytes(net_secret, sizeof(net_secret)); - return 0; } -late_initcall(net_secret_init); #ifdef CONFIG_INET static u32 seq_scale(u32 seq) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93824c57b10..c61b3bb87a1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -114,6 +114,7 @@ #include #include #include +#include #ifdef CONFIG_IP_MROUTE #include #endif @@ -262,8 +263,10 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + net_secret_init(); + } } EXPORT_SYMBOL(build_ehash_secret); -- cgit v1.2.3-70-g09d2 From 6a5dc9e598fe90160fee7de098fa319665f5253e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Apr 2013 08:39:56 +0000 Subject: net: Add MIB counters for checksum errors Add MIB counters for checksum errors in IP layer, and TCP/UDP/ICMP layers, to help diagnose problems. $ nstat -a | grep Csum IcmpInCsumErrors 72 0.0 TcpInCsumErrors 382 0.0 UdpInCsumErrors 463221 0.0 Icmp6InCsumErrors 75 0.0 Udp6InCsumErrors 173442 0.0 IpExtInCsumErrors 10884 0.0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 5 +++++ net/ipv4/icmp.c | 4 +++- net/ipv4/ip_input.c | 4 +++- net/ipv4/proc.c | 10 +++++++--- net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_ipv4.c | 14 ++++++++++---- net/ipv4/tcp_output.c | 3 ++- net/ipv4/udp.c | 11 +++++++++-- net/ipv6/icmp.c | 4 +++- net/ipv6/proc.c | 4 ++++ net/ipv6/tcp_ipv6.c | 19 ++++++++++--------- net/ipv6/udp.c | 21 +++++++++++++++------ 12 files changed, 72 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index fefdec91c68..df2e8b4f9c0 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -50,6 +50,7 @@ enum IPSTATS_MIB_OUTMCASTOCTETS, /* OutMcastOctets */ IPSTATS_MIB_INBCASTOCTETS, /* InBcastOctets */ IPSTATS_MIB_OUTBCASTOCTETS, /* OutBcastOctets */ + IPSTATS_MIB_CSUMERRORS, /* InCsumErrors */ __IPSTATS_MIB_MAX }; @@ -87,6 +88,7 @@ enum ICMP_MIB_OUTTIMESTAMPREPS, /* OutTimestampReps */ ICMP_MIB_OUTADDRMASKS, /* OutAddrMasks */ ICMP_MIB_OUTADDRMASKREPS, /* OutAddrMaskReps */ + ICMP_MIB_CSUMERRORS, /* InCsumErrors */ __ICMP_MIB_MAX }; @@ -103,6 +105,7 @@ enum ICMP6_MIB_INERRORS, /* InErrors */ ICMP6_MIB_OUTMSGS, /* OutMsgs */ ICMP6_MIB_OUTERRORS, /* OutErrors */ + ICMP6_MIB_CSUMERRORS, /* InCsumErrors */ __ICMP6_MIB_MAX }; @@ -130,6 +133,7 @@ enum TCP_MIB_RETRANSSEGS, /* RetransSegs */ TCP_MIB_INERRS, /* InErrs */ TCP_MIB_OUTRSTS, /* OutRsts */ + TCP_MIB_CSUMERRORS, /* InCsumErrors */ __TCP_MIB_MAX }; @@ -147,6 +151,7 @@ enum UDP_MIB_OUTDATAGRAMS, /* OutDatagrams */ UDP_MIB_RCVBUFERRORS, /* RcvbufErrors */ UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ + UDP_MIB_CSUMERRORS, /* InCsumErrors */ __UDP_MIB_MAX }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3ac5dff7962..76e10b47e05 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -881,7 +881,7 @@ int icmp_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; if (__skb_checksum_complete(skb)) - goto error; + goto csum_error; } if (!pskb_pull(skb, sizeof(*icmph))) @@ -929,6 +929,8 @@ int icmp_rcv(struct sk_buff *skb) drop: kfree_skb(skb); return 0; +csum_error: + ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS); error: ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); goto drop; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2bdf802e28e..3da817b89e9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -419,7 +419,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - goto inhdr_error; + goto csum_error; len = ntohs(iph->tot_len); if (skb->len < len) { @@ -446,6 +446,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); +csum_error: + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS); inhdr_error: IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); drop: diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6da51d55d03..2a5bf86d241 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -125,6 +125,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), + SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), SNMP_MIB_SENTINEL }; @@ -162,6 +163,7 @@ static const struct snmp_mib snmp4_tcp_list[] = { SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS), SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS), + SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS), SNMP_MIB_SENTINEL }; @@ -172,6 +174,7 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), + SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_SENTINEL }; @@ -322,15 +325,16 @@ static void icmp_put(struct seq_file *seq) struct net *net = seq->private; atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; - seq_puts(seq, "\nIcmp: InMsgs InErrors"); + seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_printf(seq, " OutMsgs OutErrors"); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); - seq_printf(seq, "\nIcmp: %lu %lu", + seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index aafd052865b..08bbe609652 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5273,6 +5273,7 @@ step5: return 0; csum_error: + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); discard: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2278669b1d8..8ea97516459 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1866,6 +1866,7 @@ discard: return 0; csum_err: + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; } @@ -1985,7 +1986,7 @@ int tcp_v4_rcv(struct sk_buff *skb) * provided case of th->doff==0 is eliminated. * So, we defer the checks. */ if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) - goto bad_packet; + goto csum_error; th = tcp_hdr(skb); iph = ip_hdr(skb); @@ -2051,6 +2052,8 @@ no_tcp_socket: goto discard_it; if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { +csum_error: + TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); bad_packet: TCP_INC_STATS_BH(net, TCP_MIB_INERRS); } else { @@ -2072,10 +2075,13 @@ do_time_wait: goto discard_it; } - if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(net, TCP_MIB_INERRS); + if (skb->len < (th->doff << 2)) { inet_twsk_put(inet_twsk(sk)); - goto discard_it; + goto bad_packet; + } + if (tcp_checksum_complete(skb)) { + inet_twsk_put(inet_twsk(sk)); + goto csum_error; } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b735c23a961..536d40929ba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -80,8 +80,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { tcp_rearm_rto(sk); + } } /* SND.NXT, if window was not shrunk. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2722db024a0..3159d16441d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1131,6 +1131,8 @@ static unsigned int first_packet_length(struct sock *sk) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, + IS_UDPLITE(sk)); UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); @@ -1286,8 +1288,10 @@ out: csum_copy_err: slow = lock_sock_fast(sk); - if (!skb_kill_datagram(sk, skb, flags)) + if (!skb_kill_datagram(sk, skb, flags)) { + UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + } unlock_sock_fast(sk, slow); if (noblock) @@ -1513,7 +1517,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) - goto drop; + goto csum_error; if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) @@ -1533,6 +1537,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return rc; +csum_error: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); @@ -1749,6 +1755,7 @@ csum_error: proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); + UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2a53a790514..b4ff0a42b8c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -699,7 +699,7 @@ static int icmpv6_rcv(struct sk_buff *skb) if (__skb_checksum_complete(skb)) { LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", saddr, daddr); - goto discard_it; + goto csum_error; } } @@ -785,6 +785,8 @@ static int icmpv6_rcv(struct sk_buff *skb) kfree_skb(skb); return 0; +csum_error: + ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index bbbe53a99b5..115cc58898f 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -90,6 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), + SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), SNMP_MIB_SENTINEL }; @@ -99,6 +100,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = { SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS), + SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS), SNMP_MIB_SENTINEL }; @@ -129,6 +131,7 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), + SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_SENTINEL }; @@ -139,6 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), + SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e51bd1a5826..71167069b39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1405,6 +1405,7 @@ discard: kfree_skb(skb); return 0; csum_err: + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -1466,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) goto discard_it; if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) - goto bad_packet; + goto csum_error; th = tcp_hdr(skb); hdr = ipv6_hdr(skb); @@ -1530,6 +1531,8 @@ no_tcp_socket: goto discard_it; if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { +csum_error: + TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); bad_packet: TCP_INC_STATS_BH(net, TCP_MIB_INERRS); } else { @@ -1537,11 +1540,6 @@ bad_packet: } discard_it: - - /* - * Discard frame - */ - kfree_skb(skb); return 0; @@ -1555,10 +1553,13 @@ do_time_wait: goto discard_it; } - if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(net, TCP_MIB_INERRS); + if (skb->len < (th->doff<<2)) { inet_twsk_put(inet_twsk(sk)); - goto discard_it; + goto bad_packet; + } + if (tcp_checksum_complete(skb)) { + inet_twsk_put(inet_twsk(sk)); + goto csum_error; } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index da6019b6373..d4defdd4493 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -483,12 +483,17 @@ out: csum_copy_err: slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { - if (is_udp4) + if (is_udp4) { + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - else + } else { + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + } } unlock_sock_fast(sk, slow); @@ -637,7 +642,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) - goto drop; + goto csum_error; } if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) @@ -656,6 +661,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_unlock_sock(sk); return rc; +csum_error: + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); @@ -817,7 +824,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, } if (udp6_csum_init(skb, uh, proto)) - goto discard; + goto csum_error; /* * Multicast receive code @@ -850,7 +857,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto discard; if (udp_lib_checksum_complete(skb)) - goto discard; + goto csum_error; UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); @@ -867,7 +874,9 @@ short_packet: skb->len, daddr, ntohs(uh->dest)); - + goto discard; +csum_error: + UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); -- cgit v1.2.3-70-g09d2 From bd7c4b604a6cd707803c7c6ba142bfa131f9a9f3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 30 Apr 2013 05:35:05 +0000 Subject: netpoll: convert mutex into a semaphore Bart Van Assche recently reported a warning to me: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] mutex_trylock+0x16d/0x180 [] netpoll_poll_dev+0x49/0xc30 [] ? __alloc_skb+0x82/0x2a0 [] netpoll_send_skb_on_dev+0x265/0x410 [] netpoll_send_udp+0x28a/0x3a0 [] ? write_msg+0x53/0x110 [netconsole] [] write_msg+0xcf/0x110 [netconsole] [] call_console_drivers.constprop.17+0xa1/0x1c0 [] console_unlock+0x2d6/0x450 [] vprintk_emit+0x1ee/0x510 [] printk+0x4d/0x4f [] scsi_print_command+0x7d/0xe0 [scsi_mod] This resulted from my commit ca99ca14c which introduced a mutex_trylock operation in a path that could execute in interrupt context. When mutex debugging is enabled, the above warns the user when we are in fact exectuting in interrupt context interrupt context. After some discussion, It seems that a semaphore is the proper mechanism to use here. While mutexes are defined to be unusable in interrupt context, no such condition exists for semaphores (save for the fact that the non blocking api calls, like up and down_trylock must be used when in irq context). Signed-off-by: Neil Horman Reported-by: Bart Van Assche CC: Bart Van Assche CC: David Miller CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/netpoll.h | 2 +- net/core/netpoll.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 9d7d8c64f7c..fa2cb76a702 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -40,7 +40,7 @@ struct netpoll_info { unsigned long rx_flags; spinlock_t rx_lock; - struct mutex dev_lock; + struct semaphore dev_lock; struct list_head rx_np; /* netpolls that registered an rx_hook */ struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 209d84253dd..a5802a8b652 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -206,17 +206,17 @@ static void netpoll_poll_dev(struct net_device *dev) * the dev_open/close paths use this to block netpoll activity * while changing device state */ - if (!mutex_trylock(&ni->dev_lock)) + if (!down_trylock(&ni->dev_lock)) return; if (!netif_running(dev)) { - mutex_unlock(&ni->dev_lock); + up(&ni->dev_lock); return; } ops = dev->netdev_ops; if (!ops->ndo_poll_controller) { - mutex_unlock(&ni->dev_lock); + up(&ni->dev_lock); return; } @@ -225,7 +225,7 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); - mutex_unlock(&ni->dev_lock); + up(&ni->dev_lock); if (dev->flags & IFF_SLAVE) { if (ni) { @@ -255,7 +255,7 @@ int netpoll_rx_disable(struct net_device *dev) idx = srcu_read_lock(&netpoll_srcu); ni = srcu_dereference(dev->npinfo, &netpoll_srcu); if (ni) - mutex_lock(&ni->dev_lock); + down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); return 0; } @@ -267,7 +267,7 @@ void netpoll_rx_enable(struct net_device *dev) rcu_read_lock(); ni = rcu_dereference(dev->npinfo); if (ni) - mutex_unlock(&ni->dev_lock); + up(&ni->dev_lock); rcu_read_unlock(); } EXPORT_SYMBOL(netpoll_rx_enable); @@ -1047,7 +1047,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); - mutex_init(&npinfo->dev_lock); + sema_init(&npinfo->dev_lock, 1); skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); -- cgit v1.2.3-70-g09d2 From 60bc851ae59bfe99be6ee89d6bc50008c85ec75d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 May 2013 05:24:03 +0000 Subject: af_unix: fix a fatal race with bit fields Using bit fields is dangerous on ppc64/sparc64, as the compiler [1] uses 64bit instructions to manipulate them. If the 64bit word includes any atomic_t or spinlock_t, we can lose critical concurrent changes. This is happening in af_unix, where unix_sk(sk)->gc_candidate/ gc_maybe_cycle/lock share the same 64bit word. This leads to fatal deadlock, as one/several cpus spin forever on a spinlock that will never be available again. A safer way would be to use a long to store flags. This way we are sure compiler/arch wont do bad things. As we own unix_gc_lock spinlock when clearing or setting bits, we can use the non atomic __set_bit()/__clear_bit(). recursion_level can share the same 64bit location with the spinlock, as it is set only with this spinlock held. [1] bug fixed in gcc-4.8.0 : http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080 Reported-by: Ambrose Feinstein Signed-off-by: Eric Dumazet Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: David S. Miller --- include/net/af_unix.h | 5 +++-- net/unix/garbage.c | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a8836e8445c..dbdfd2b0f3b 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -57,9 +57,10 @@ struct unix_sock { struct list_head link; atomic_long_t inflight; spinlock_t lock; - unsigned int gc_candidate : 1; - unsigned int gc_maybe_cycle : 1; unsigned char recursion_level; + unsigned long gc_flags; +#define UNIX_GC_CANDIDATE 0 +#define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d0f6545b001..9bc73f87f64 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), * have been added to the queues after * starting the garbage collection */ - if (u->gc_candidate) { + if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { hit = true; func(u); } @@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u) * of the list, so that it's checked even if it was already * passed over */ - if (u->gc_maybe_cycle) + if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags)) list_move_tail(&u->link, &gc_candidates); } @@ -315,8 +315,8 @@ void unix_gc(void) BUG_ON(total_refs < inflight_refs); if (total_refs == inflight_refs) { list_move_tail(&u->link, &gc_candidates); - u->gc_candidate = 1; - u->gc_maybe_cycle = 1; + __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); + __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); } } @@ -344,7 +344,7 @@ void unix_gc(void) if (atomic_long_read(&u->inflight) > 0) { list_move_tail(&u->link, ¬_cycle_list); - u->gc_maybe_cycle = 0; + __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); } } @@ -356,7 +356,7 @@ void unix_gc(void) */ while (!list_empty(¬_cycle_list)) { u = list_entry(not_cycle_list.next, struct unix_sock, link); - u->gc_candidate = 0; + __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags); list_move_tail(&u->link, &gc_inflight_list); } -- cgit v1.2.3-70-g09d2 From 20074f357da4a637430aec2879c9d864c5d2c23c Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Wed, 1 May 2013 16:24:08 -0400 Subject: filter: fix va_list build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the following build error. In file included from include/linux/filter.h:52:0, from arch/arm/net/bpf_jit_32.c:14: include/linux/printk.h:54:2: error: unknown type name ‘va_list’ include/linux/printk.h:105:21: error: unknown type name ‘va_list’ include/linux/printk.h:108:30: error: unknown type name ‘va_list’ Signed-off-by: Xi Wang Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index d1248f401a5..c050dcc322a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -48,6 +48,7 @@ extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); #ifdef CONFIG_BPF_JIT +#include #include #include -- cgit v1.2.3-70-g09d2